144 files changed, 7532 insertions, 2227 deletions
diff --git a/net/Kconfig b/net/Kconfig
index c2cdbce629bd..7b6cd340b72b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -369,6 +369,7 @@ source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
+source "net/strparser/Kconfig"
 
 config FIB_RULES
 	bool
diff --git a/net/Makefile b/net/Makefile
index 9bd20bb86cc6..4cafaa2b4667 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_AF_KCM)		+= kcm/
+obj-$(CONFIG_STREAM_PARSER)	+= strparser/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 0e982222d425..3b3b1a292ec8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1007,7 +1007,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
-	if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+	if (strncmp(dev->name, "lec", 3))
 		return NOTIFY_DONE; /* we are only interested in lec:s */
 
 	switch (event) {
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 833bb145ba3c..f20742cbae6d 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -73,10 +73,21 @@ config BATMAN_ADV_MCAST
 	  reduce the air overhead while improving the reliability of
 	  multicast messages.
 
-config BATMAN_ADV_DEBUG
-	bool "B.A.T.M.A.N. debugging"
+config BATMAN_ADV_DEBUGFS
+	bool "batman-adv debugfs entries"
 	depends on BATMAN_ADV
 	depends on DEBUG_FS
+	default y
+	help
+	  Enable this to export routing related debug tables via debugfs.
+	  The information for each soft-interface and used hard-interface can be
+	  found under batman_adv/
+
+	  If unsure, say Y.
+
+config BATMAN_ADV_DEBUG
+	bool "B.A.T.M.A.N. debugging"
+	depends on BATMAN_ADV_DEBUGFS
 	help
 	  This is an option for use by developers; most people should
 	  say N here. This enables compilation of support for
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index a83fc6c58d19..f724d3c98a81 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -24,14 +24,14 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
 batman-adv-y += bitarray.o
 batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
-batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o
 batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
 batman-adv-y += fragmentation.o
 batman-adv-y += gateway_client.o
 batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
-batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o
 batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 81dbbf569bd4..623d04302aa2 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -20,12 +20,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "netlink.h"
 
 char batadv_routing_algo[20] = "BATMAN_IV";
 static struct hlist_head batadv_algo_list;
@@ -95,6 +101,7 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
 	return 0;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct batadv_algo_ops *bat_algo_ops;
@@ -107,6 +114,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 
 	return 0;
 }
+#endif
 
 static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
 {
@@ -138,3 +146,65 @@ static struct kparam_string batadv_param_string_ra = {
 
 module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
 		0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ *  algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_algo_ops *bat_algo_ops)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ *  algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_algo_ops *bat_algo_ops;
+	int skip = cb->args[0];
+	int i = 0;
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		if (i++ < skip)
+			continue;
+
+		if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_algo_ops)) {
+			i--;
+			break;
+		}
+	}
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 860d773dd8fa..3b5b69cdd12b 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -22,7 +22,9 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@ void batadv_algo_init(void);
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
 int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 19b0abd6c640..e2d18d0b1f06 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/printk.h>
 #include <linux/random.h>
@@ -48,12 +49,17 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bitarray.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
@@ -318,17 +324,18 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
 	if (!orig_node->bat_iv.bcast_own_sum)
 		goto free_orig_node;
 
+	kref_get(&orig_node->refcount);
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0)
-		goto free_orig_node;
+		goto free_orig_node_hash;
 
 	return orig_node;
 
-free_orig_node:
-	/* free twice, as batadv_orig_node_new sets refcount to 2 */
+free_orig_node_hash:
 	batadv_orig_node_put(orig_node);
+free_orig_node:
 	batadv_orig_node_put(orig_node);
 
 	return NULL;
@@ -528,36 +535,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
 static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 {
 	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
-	struct batadv_hard_iface *primary_if = NULL;
 
 	if (!forw_packet->if_incoming) {
 		pr_err("Error - can't forward packet: incoming iface not specified\n");
-		goto out;
+		return;
 	}
 
 	soft_iface = forw_packet->if_incoming->soft_iface;
-	bat_priv = netdev_priv(soft_iface);
 
 	if (WARN_ON(!forw_packet->if_outgoing))
-		goto out;
+		return;
 
 	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
-		goto out;
+		return;
 
 	if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
-		goto out;
-
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
+		return;
 
 	/* only for one specific outgoing interface */
 	batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
-
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
 }
 
 /**
@@ -685,19 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 	struct batadv_forw_packet *forw_packet_aggr;
 	unsigned char *skb_buff;
 	unsigned int skb_size;
+	atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
 
-	/* own packet should always be scheduled */
-	if (!own_packet) {
-		if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-				   "batman packet queue full\n");
-			return;
-		}
-	}
-
-	forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+	forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+						    queue_left, bat_priv);
 	if (!forw_packet_aggr)
-		goto out_nomem;
+		return;
 
 	if (atomic_read(&bat_priv->aggregated_ogms) &&
 	    packet_len < BATADV_MAX_AGGREGATION_BYTES)
@@ -708,8 +697,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 	skb_size += ETH_HLEN;
 
 	forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
-	if (!forw_packet_aggr->skb)
-		goto out_free_forw_packet;
+	if (!forw_packet_aggr->skb) {
+		batadv_forw_packet_free(forw_packet_aggr);
+		return;
+	}
+
 	forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
 	skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
 
@@ -717,12 +709,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 	forw_packet_aggr->packet_len = packet_len;
 	memcpy(skb_buff, packet_buff, packet_len);
 
-	kref_get(&if_incoming->refcount);
-	kref_get(&if_outgoing->refcount);
 	forw_packet_aggr->own = own_packet;
-	forw_packet_aggr->if_incoming = if_incoming;
-	forw_packet_aggr->if_outgoing = if_outgoing;
-	forw_packet_aggr->num_packets = 0;
 	forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
 	forw_packet_aggr->send_time = send_time;
 
@@ -741,13 +728,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 	queue_delayed_work(batadv_event_workqueue,
 			   &forw_packet_aggr->delayed_work,
 			   send_time - jiffies);
-
-	return;
-out_free_forw_packet:
-	kfree(forw_packet_aggr);
-out_nomem:
-	if (!own_packet)
-		atomic_inc(&bat_priv->batman_queue_left);
 }
 
 /* aggregate a new packet into the existing ogm packet */
@@ -1830,10 +1810,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
 		batadv_iv_ogm_schedule(forw_packet->if_incoming);
 
 out:
-	/* don't count own packet */
-	if (!forw_packet->own)
-		atomic_inc(&bat_priv->batman_queue_left);
-
 	batadv_forw_packet_free(forw_packet);
 }
 
@@ -1879,6 +1855,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	return NET_RX_SUCCESS;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -1976,8 +1953,239 @@ next:
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
+
+/**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ *  given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+			       struct batadv_hard_iface *if_outgoing,
+			       u8 *tq_avg)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return false;
+
+	*tq_avg = n_ifinfo->bat_iv.tq_avg;
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *if_outgoing,
+				 struct batadv_orig_node *orig_node,
+				 struct batadv_neigh_node *neigh_node,
+				 bool best)
+{
+	void *hdr;
+	u8 tq_avg;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+		return 0;
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_priv *bat_priv,
+			      struct batadv_hard_iface *if_outgoing,
+			      struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+	u8 tq_avg_best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+					    &tq_avg_best))
+		goto out;
+
+	if (tq_avg_best == 0)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+						     bat_priv, if_outgoing,
+						     orig_node, neigh_node,
+						     best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
 
 /**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_priv *bat_priv,
+			       struct batadv_hard_iface *if_outgoing,
+			       struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+						  if_outgoing, orig_node,
+						  sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			struct batadv_priv *bat_priv,
+			struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+						   cb->nlh->nlmsg_seq,
+						   bat_priv, if_outgoing, head,
+						   &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
  * @hardif_neigh: hardif neighbour information
@@ -2027,37 +2235,43 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
- * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
  * @neigh1: the first neighbor object of the comparison
  * @if_outgoing1: outgoing interface for the first neighbor
  * @neigh2: the second neighbor object of the comparison
  * @if_outgoing2: outgoing interface for the second neighbor
+ * @diff: pointer to integer receiving the calculated difference
  *
- * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
- * lower, the same as or higher than the metric via neigh2
+ * The content of *@diff is only valid when this function returns true.
+ * It is less, equal to or greater than 0 if the metric via neigh1 is lower,
+ * the same as or higher than the metric via neigh2
+ *
+ * Return: true when the difference could be calculated, false otherwise
  */
-static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
-				   struct batadv_hard_iface *if_outgoing1,
-				   struct batadv_neigh_node *neigh2,
-				   struct batadv_hard_iface *if_outgoing2)
+static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
+				     struct batadv_hard_iface *if_outgoing1,
+				     struct batadv_neigh_node *neigh2,
+				     struct batadv_hard_iface *if_outgoing2,
+				     int *diff)
 {
 	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
 	u8 tq1, tq2;
-	int diff;
+	bool ret = true;
 
 	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
 	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
 
 	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		diff = 0;
+		ret = false;
 		goto out;
 	}
 
 	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
 	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	diff = tq1 - tq2;
+	*diff = (int)tq1 - (int)tq2;
 
 out:
 	if (neigh1_ifinfo)
@@ -2065,6 +2279,162 @@ out:
 	if (neigh2_ifinfo)
 		batadv_neigh_ifinfo_put(neigh2_ifinfo);
 
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ *  into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_priv *bat_priv,
+				struct batadv_hard_iface *hard_iface,
+				int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+						   hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    single_hardif,
+							    &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+					list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    hard_iface, &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ *
+ * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+				   struct batadv_hard_iface *if_outgoing1,
+				   struct batadv_neigh_node *neigh2,
+				   struct batadv_hard_iface *if_outgoing2)
+{
+	bool ret;
+	int diff;
+
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return 0;
+
 	return diff;
 }
 
@@ -2085,36 +2455,341 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
 			   struct batadv_neigh_node *neigh2,
 			   struct batadv_hard_iface *if_outgoing2)
 {
-	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-	u8 tq1, tq2;
 	bool ret;
+	int diff;
 
-	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return false;
 
-	/* we can't say that the metric is better */
-	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		ret = false;
+	ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
+	return ret;
+}
+
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+	/* begin scheduling originator messages on that interface */
+	batadv_iv_ogm_schedule(hard_iface);
+}
+
+static struct batadv_gw_node *
+batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo;
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u64 max_gw_factor = 0;
+	u64 tmp_gw_factor = 0;
+	u8 max_tq = 0;
+	u8 tq_avg;
+	struct batadv_orig_node *orig_node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		orig_node = gw_node->orig_node;
+		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+		if (!router)
+			continue;
+
+		router_ifinfo = batadv_neigh_ifinfo_get(router,
+							BATADV_IF_DEFAULT);
+		if (!router_ifinfo)
+			goto next;
+
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			goto next;
+
+		tq_avg = router_ifinfo->bat_iv.tq_avg;
+
+		switch (atomic_read(&bat_priv->gw.sel_class)) {
+		case 1: /* fast connection */
+			tmp_gw_factor = tq_avg * tq_avg;
+			tmp_gw_factor *= gw_node->bandwidth_down;
+			tmp_gw_factor *= 100 * 100;
+			tmp_gw_factor >>= 18;
+
+			if ((tmp_gw_factor > max_gw_factor) ||
+			    ((tmp_gw_factor == max_gw_factor) &&
+			     (tq_avg > max_tq))) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+
+		default: /* 2:  stable connection (use best statistic)
+			  * 3:  fast-switch (use best statistic but change as
+			  *     soon as a better gateway appears)
+			  * XX: late-switch (use best statistic but change as
+			  *     soon as a better gateway appears which has
+			  *     $routing_class more tq points)
+			  */
+			if (tq_avg > max_tq) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+		}
+
+		if (tq_avg > max_tq)
+			max_tq = tq_avg;
+
+		if (tmp_gw_factor > max_gw_factor)
+			max_gw_factor = tmp_gw_factor;
+
+		batadv_gw_node_put(gw_node);
+
+next:
+		batadv_neigh_node_put(router);
+		if (router_ifinfo)
+			batadv_neigh_ifinfo_put(router_ifinfo);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
+				     struct batadv_orig_node *curr_gw_orig,
+				     struct batadv_orig_node *orig_node)
+{
+	struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL;
+	struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL;
+	struct batadv_neigh_node *router_gw = NULL;
+	struct batadv_neigh_node *router_orig = NULL;
+	u8 gw_tq_avg, orig_tq_avg;
+	bool ret = false;
+
+	/* dynamic re-election is performed only on fast or late switch */
+	if (atomic_read(&bat_priv->gw.sel_class) <= 2)
+		return false;
+
+	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
+	if (!router_gw) {
+		ret = true;
 		goto out;
 	}
 
-	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
+	router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw,
+						   BATADV_IF_DEFAULT);
+	if (!router_gw_ifinfo) {
+		ret = true;
+		goto out;
+	}
+
+	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router_orig)
+		goto out;
+
+	router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig,
+						     BATADV_IF_DEFAULT);
+	if (!router_orig_ifinfo)
+		goto out;
+
+	gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg;
+	orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg;
+
+	/* the TQ value has to be better */
+	if (orig_tq_avg < gw_tq_avg)
+		goto out;
 
+	/* if the routing class is greater than 3 the value tells us how much
+	 * greater the TQ value of the new gateway must be
+	 */
+	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
+		   gw_tq_avg, orig_tq_avg);
+
+	ret = true;
 out:
-	if (neigh1_ifinfo)
-		batadv_neigh_ifinfo_put(neigh1_ifinfo);
-	if (neigh2_ifinfo)
-		batadv_neigh_ifinfo_put(neigh2_ifinfo);
+	if (router_gw_ifinfo)
+		batadv_neigh_ifinfo_put(router_gw_ifinfo);
+	if (router_orig_ifinfo)
+		batadv_neigh_ifinfo_put(router_orig_ifinfo);
+	if (router_gw)
+		batadv_neigh_node_put(router_gw);
+	if (router_orig)
+		batadv_neigh_node_put(router_orig);
 
 	return ret;
 }
 
-static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					  struct seq_file *seq,
+					  const struct batadv_gw_node *gw_node)
 {
-	/* begin scheduling originator messages on that interface */
-	batadv_iv_ogm_schedule(hard_iface);
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_iv.tq_avg, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
+			       struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				   struct batadv_priv *bat_priv,
+				   struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node)
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) ||
+	    nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN,
+		    router->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
+			gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_iv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			      struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					    bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
 }
 
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
@@ -2129,14 +2804,28 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.neigh = {
 		.cmp = batadv_iv_ogm_neigh_cmp,
 		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_iv_neigh_print,
+#endif
+		.dump = batadv_iv_ogm_neigh_dump,
 	},
 	.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_iv_ogm_orig_print,
+#endif
+		.dump = batadv_iv_ogm_orig_dump,
 		.free = batadv_iv_ogm_orig_free,
 		.add_if = batadv_iv_ogm_orig_add_if,
 		.del_if = batadv_iv_ogm_orig_del_if,
 	},
+	.gw = {
+		.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
+		.is_eligible = batadv_iv_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+		.print = batadv_iv_gw_print,
+#endif
+		.dump = batadv_iv_gw_dump,
+	},
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0366cbf5e444..e79f6f01182e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -21,24 +21,38 @@
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 
+struct sk_buff;
+
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -115,6 +129,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
 		  batadv_v_elp_throughput_metric_update);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_v_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -198,8 +213,142 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
+
+/**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+	throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+	throughput = throughput * 100;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
 
 /**
+ * batadv_v_neigh_dump_hardif - Dump the  neighbours of a hard interface  into
+ *  a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_hard_iface *hard_iface,
+			   int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface  into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		    struct batadv_priv *bat_priv,
+		    struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, single_hardif,
+						       &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, hard_iface,
+						       &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
  * @seq: debugfs table seq_file struct
@@ -265,6 +414,205 @@ next:
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
+
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_hard_iface *if_outgoing,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node,
+			    bool best)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+	void *hdr;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return 0;
+
+	throughput = n_ifinfo->bat_v.throughput * 100;
+
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *if_outgoing,
+			 struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+						if_outgoing, orig_node,
+						neigh_node, best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv,
+			  struct batadv_hard_iface *if_outgoing,
+			  struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+					     if_outgoing, orig_node, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		   struct batadv_priv *bat_priv,
+		   struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_v_orig_dump_bucket(msg, portid,
+					      cb->nlh->nlmsg_seq,
+					      bat_priv, if_outgoing, head, &idx,
+					      &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
 
 static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
 			      struct batadv_hard_iface *if_outgoing1,
@@ -320,6 +668,365 @@ err_ifinfo1:
 	return ret;
 }
 
+static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
+					char *buff, size_t count)
+{
+	u32 old_class, class;
+
+	if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
+				     "B.A.T.M.A.N. V GW selection class",
+				     &class))
+		return -EINVAL;
+
+	old_class = atomic_read(&bat_priv->gw.sel_class);
+	atomic_set(&bat_priv->gw.sel_class, class);
+
+	if (old_class != class)
+		batadv_gw_reselect(bat_priv);
+
+	return count;
+}
+
+static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
+{
+	u32 class = atomic_read(&bat_priv->gw.sel_class);
+
+	return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10);
+}
+
+/**
+ * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * @gw_node: the GW to retrieve the metric for
+ * @bw: the pointer where the metric will be stored. The metric is computed as
+ *  the minimum between the GW advertised throughput and the path throughput to
+ *  it in the mesh
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_orig_node *orig_node;
+	struct batadv_neigh_node *router;
+	int ret = -1;
+
+	orig_node = gw_node->orig_node;
+	router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	/* the GW metric is computed as the minimum between the path throughput
+	 * to reach the GW itself and the advertised bandwidth.
+	 * This gives us an approximation of the effective throughput that the
+	 * client can expect via this particular GW node
+	 */
+	*bw = router_ifinfo->bat_v.throughput;
+	*bw = min_t(u32, *bw, gw_node->bandwidth_down);
+
+	ret = 0;
+out:
+	if (router)
+		batadv_neigh_node_put(router);
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+
+	return ret;
+}
+
+/**
+ * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: the GW node having the best GW-metric, NULL if no GW is known
+ */
+static struct batadv_gw_node *
+batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u32 max_bw = 0, bw;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			continue;
+
+		if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
+			goto next;
+
+		if (curr_gw && (bw <= max_bw))
+			goto next;
+
+		if (curr_gw)
+			batadv_gw_node_put(curr_gw);
+
+		curr_gw = gw_node;
+		kref_get(&curr_gw->refcount);
+		max_bw = bw;
+
+next:
+		batadv_gw_node_put(gw_node);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+/**
+ * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * @bat_priv: the bat priv with all the soft interface information
+ * @curr_gw_orig: originator representing the currently selected GW
+ * @orig_node: the originator representing the new candidate
+ *
+ * Return: true if orig_node can be selected as current GW, false otherwise
+ */
+static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
+				    struct batadv_orig_node *curr_gw_orig,
+				    struct batadv_orig_node *orig_node)
+{
+	struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+	u32 gw_throughput, orig_throughput, threshold;
+	bool ret = false;
+
+	threshold = atomic_read(&bat_priv->gw.sel_class);
+
+	curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig);
+	if (!curr_gw) {
+		ret = true;
+		goto out;
+	}
+
+	if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) {
+		ret = true;
+		goto out;
+	}
+
+	orig_gw = batadv_gw_node_get(bat_priv, orig_node);
+	if (!orig_node)
+		goto out;
+
+	if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
+		goto out;
+
+	if (orig_throughput < gw_throughput)
+		goto out;
+
+	if ((orig_throughput - gw_throughput) < threshold)
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n",
+		   gw_throughput, orig_throughput);
+
+	ret = true;
+out:
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+	if (orig_gw)
+		batadv_gw_node_put(orig_gw);
+
+	return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					 struct seq_file *seq,
+					 const struct batadv_gw_node *gw_node)
+{
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_v.throughput / 10,
+		   router_ifinfo->bat_v.throughput % 10, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_print - print the gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: gateway table seq_file struct
+ */
+static void batadv_v_gw_print(struct batadv_priv *bat_priv,
+			      struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway        ( throughput)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_priv *bat_priv,
+				  struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node) {
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT,
+			router_ifinfo->bat_v.throughput)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			     struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 	.name = "BATMAN_V",
 	.iface = {
@@ -333,10 +1040,26 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 		.hardif_init = batadv_v_hardif_neigh_init,
 		.cmp = batadv_v_neigh_cmp,
 		.is_similar_or_better = batadv_v_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_v_neigh_print,
+#endif
+		.dump = batadv_v_neigh_dump,
 	},
 	.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_v_orig_print,
+#endif
+		.dump = batadv_v_orig_dump,
+	},
+	.gw = {
+		.store_sel_class = batadv_v_store_sel_class,
+		.show_sel_class = batadv_v_show_sel_class,
+		.get_best_gw_node = batadv_v_gw_get_best_gw_node,
+		.is_eligible = batadv_v_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+		.print = batadv_v_gw_print,
+#endif
+		.dump = batadv_v_gw_dump,
 	},
 };
 
@@ -363,7 +1086,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
  */
 int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 {
-	return batadv_v_ogm_init(bat_priv);
+	int ret = 0;
+
+	ret = batadv_v_ogm_init(bat_priv);
+	if (ret < 0)
+		return ret;
+
+	/* set default throughput difference threshold to 5Mbps */
+	atomic_set(&bat_priv->gw.sel_class, 50);
+
+	return 0;
 }
 
 /**
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 6fbba4eb0617..1aeeadca620c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -73,13 +73,12 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
 	if (!orig_node)
 		return NULL;
 
+	kref_get(&orig_node->refcount);
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0) {
-		/* orig_node->refcounter is initialised to 2 by
-		 * batadv_orig_node_new()
-		 */
+		/* remove refcnt for newly created orig_node and hash entry */
 		batadv_orig_node_put(orig_node);
 		batadv_orig_node_put(orig_node);
 		orig_node = NULL;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ad2ffe16d29f..e7f690b571ea 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -45,12 +46,18 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -519,11 +526,9 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
 	atomic_set(&entry->wait_periods, 0);
 	ether_addr_copy(entry->orig, orig);
 	INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
-
-	/* one for the hash, one for returning */
 	kref_init(&entry->refcount);
-	kref_get(&entry->refcount);
 
+	kref_get(&entry->refcount);
 	hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
 				     batadv_compare_backbone_gw,
 				     batadv_choose_backbone_gw, entry,
@@ -711,12 +716,13 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 		claim->lasttime = jiffies;
 		kref_get(&backbone_gw->refcount);
 		claim->backbone_gw = backbone_gw;
-
 		kref_init(&claim->refcount);
-		kref_get(&claim->refcount);
+
 		batadv_dbg(BATADV_DBG_BLA, bat_priv,
 			   "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
 			   mac, BATADV_PRINT_VID(vid));
+
+		kref_get(&claim->refcount);
 		hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
 					     batadv_compare_claim,
 					     batadv_choose_claim, claim,
@@ -1148,7 +1154,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
 
 	/* Let the loopdetect frames on the mesh in any case. */
 	if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
-		return 0;
+		return false;
 
 	/* check if it is a claim frame. */
 	ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1990,6 +1996,7 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
  * @seq: seq file to print on
@@ -2050,8 +2057,172 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_hard_iface *primary_if,
+			    struct batadv_bla_claim *claim)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(claim->backbone_gw->orig,
+				    primary_addr);
+
+	spin_lock_bh(&claim->backbone_gw->crc_lock);
+	backbone_crc = claim->backbone_gw->crc;
+	spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+	    nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    claim->backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_hard_iface *primary_if,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_claim *claim;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_claim_dump_entry(msg, portid, seq,
+						primary_if, claim)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
 
 /**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.claim_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_claim_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq,
+						 primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
  * @seq: seq file to print on
@@ -2114,3 +2285,168 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hard_iface *primary_if,
+			       struct batadv_bla_backbone_gw *backbone_gw)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	int msecs;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+	spin_lock_bh(&backbone_gw->crc_lock);
+	backbone_crc = backbone_gw->crc;
+	spin_unlock_bh(&backbone_gw->crc_lock);
+
+	msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_hard_iface *primary_if,
+				struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+						   primary_if, backbone_gw)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.backbone_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_backbone_dump_bucket(msg, portid,
+						    cb->nlh->nlmsg_seq,
+						    primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0f01daeb359e..1ae93e46fb98 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct net_device;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -35,8 +36,10 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
 			       struct batadv_orig_node *orig_node,
 			       int hdr_size);
 int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
 					     void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
 				    unsigned short vid);
 bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -47,7 +50,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #define BATADV_BLA_CRC_INIT	0
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
@@ -112,6 +115,18 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+					struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 1d68b6e63b96..b4ffba7dd583 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -31,6 +31,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
+#include <net/net_namespace.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void)
  */
 int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
 	struct batadv_debuginfo **bat_debug;
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
 						   batadv_debugfs);
 	if (!hard_iface->debug_dir)
@@ -341,6 +346,11 @@ out:
  */
 void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
+
+	if (net != &init_net)
+		return;
+
 	if (batadv_debugfs) {
 		debugfs_remove_recursive(hard_iface->debug_dir);
 		hard_iface->debug_dir = NULL;
@@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
 	struct batadv_debuginfo **bat_debug;
+	struct net *net = dev_net(dev);
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
 	if (!bat_priv->debug_dir)
 		goto out;
@@ -392,6 +406,10 @@ out:
 void batadv_debugfs_del_meshif(struct net_device *dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+
+	if (net != &init_net)
+		return;
 
 	batadv_debug_log_cleanup(bat_priv);
 
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 1ab4e2e63afc..c68ff3dcb926 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -26,7 +26,7 @@ struct net_device;
 
 #define BATADV_DEBUGFS_SUBDIR "batman_adv"
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
+#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
 
 void batadv_debugfs_init(void);
 void batadv_debugfs_destroy(void);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b1cc8bfe11ac..e257efdc5d03 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -343,8 +343,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
 	ether_addr_copy(dat_entry->mac_addr, mac_addr);
 	dat_entry->last_update = jiffies;
 	kref_init(&dat_entry->refcount);
-	kref_get(&dat_entry->refcount);
 
+	kref_get(&dat_entry->refcount);
 	hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
 				     batadv_hash_dat, dat_entry,
 				     &dat_entry->hash_entry);
@@ -795,6 +795,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
 	batadv_dat_hash_free(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_dat_cache_seq_print_text - print the local DAT hash table
  * @seq: seq file to print on
@@ -846,6 +847,7 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
 
 /**
  * batadv_arp_get_type - parse an ARP packet and gets the type
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 63a805d3f96e..de055d64debe 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
@@ -31,6 +32,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -39,13 +41,17 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -80,12 +86,12 @@ static void batadv_gw_node_release(struct kref *ref)
  * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
  * @gw_node: gateway node to free
  */
-static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+void batadv_gw_node_put(struct batadv_gw_node *gw_node)
 {
 	kref_put(&gw_node->refcount, batadv_gw_node_release);
 }
 
-static struct batadv_gw_node *
+struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *gw_node;
@@ -164,86 +170,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv)
 	atomic_set(&bat_priv->gw.reselect, 1);
 }
 
-static struct batadv_gw_node *
-batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
-{
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo;
-	struct batadv_gw_node *gw_node, *curr_gw = NULL;
-	u64 max_gw_factor = 0;
-	u64 tmp_gw_factor = 0;
-	u8 max_tq = 0;
-	u8 tq_avg;
-	struct batadv_orig_node *orig_node;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		orig_node = gw_node->orig_node;
-		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-		if (!router)
-			continue;
-
-		router_ifinfo = batadv_neigh_ifinfo_get(router,
-							BATADV_IF_DEFAULT);
-		if (!router_ifinfo)
-			goto next;
-
-		if (!kref_get_unless_zero(&gw_node->refcount))
-			goto next;
-
-		tq_avg = router_ifinfo->bat_iv.tq_avg;
-
-		switch (atomic_read(&bat_priv->gw.sel_class)) {
-		case 1: /* fast connection */
-			tmp_gw_factor = tq_avg * tq_avg;
-			tmp_gw_factor *= gw_node->bandwidth_down;
-			tmp_gw_factor *= 100 * 100;
-			tmp_gw_factor >>= 18;
-
-			if ((tmp_gw_factor > max_gw_factor) ||
-			    ((tmp_gw_factor == max_gw_factor) &&
-			     (tq_avg > max_tq))) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-
-		default: /* 2:  stable connection (use best statistic)
-			  * 3:  fast-switch (use best statistic but change as
-			  *     soon as a better gateway appears)
-			  * XX: late-switch (use best statistic but change as
-			  *     soon as a better gateway appears which has
-			  *     $routing_class more tq points)
-			  */
-			if (tq_avg > max_tq) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-		}
-
-		if (tq_avg > max_tq)
-			max_tq = tq_avg;
-
-		if (tmp_gw_factor > max_gw_factor)
-			max_gw_factor = tmp_gw_factor;
-
-		batadv_gw_node_put(gw_node);
-
-next:
-		batadv_neigh_node_put(router);
-		if (router_ifinfo)
-			batadv_neigh_ifinfo_put(router_ifinfo);
-	}
-	rcu_read_unlock();
-
-	return curr_gw;
-}
-
 /**
  * batadv_gw_check_client_stop - check if client mode has been switched off
  * @bat_priv: the bat priv with all the soft interface information
@@ -287,12 +213,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
+	if (!bat_priv->algo_ops->gw.get_best_gw_node)
+		goto out;
+
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
 	if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
 		goto out;
 
-	next_gw = batadv_gw_get_best_gw_node(bat_priv);
+	/* if gw.reselect is set to 1 it means that a previous call to
+	 * gw.is_eligible() said that we have a new best GW, therefore it can
+	 * now be picked from the list and selected
+	 */
+	next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv);
 
 	if (curr_gw == next_gw)
 		goto out;
@@ -360,70 +293,31 @@ out:
 void batadv_gw_check_election(struct batadv_priv *bat_priv,
 			      struct batadv_orig_node *orig_node)
 {
-	struct batadv_neigh_ifinfo *router_orig_tq = NULL;
-	struct batadv_neigh_ifinfo *router_gw_tq = NULL;
 	struct batadv_orig_node *curr_gw_orig;
-	struct batadv_neigh_node *router_gw = NULL;
-	struct batadv_neigh_node *router_orig = NULL;
-	u8 gw_tq_avg, orig_tq_avg;
+
+	/* abort immediately if the routing algorithm does not support gateway
+	 * election
+	 */
+	if (!bat_priv->algo_ops->gw.is_eligible)
+		return;
 
 	curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
 	if (!curr_gw_orig)
 		goto reselect;
 
-	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
-	if (!router_gw)
-		goto reselect;
-
-	router_gw_tq = batadv_neigh_ifinfo_get(router_gw,
-					       BATADV_IF_DEFAULT);
-	if (!router_gw_tq)
-		goto reselect;
-
 	/* this node already is the gateway */
 	if (curr_gw_orig == orig_node)
 		goto out;
 
-	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-	if (!router_orig)
+	if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig,
+						orig_node))
 		goto out;
 
-	router_orig_tq = batadv_neigh_ifinfo_get(router_orig,
-						 BATADV_IF_DEFAULT);
-	if (!router_orig_tq)
-		goto out;
-
-	gw_tq_avg = router_gw_tq->bat_iv.tq_avg;
-	orig_tq_avg = router_orig_tq->bat_iv.tq_avg;
-
-	/* the TQ value has to be better */
-	if (orig_tq_avg < gw_tq_avg)
-		goto out;
-
-	/* if the routing class is greater than 3 the value tells us how much
-	 * greater the TQ value of the new gateway must be
-	 */
-	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
-	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
-		goto out;
-
-	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
-		   gw_tq_avg, orig_tq_avg);
-
 reselect:
 	batadv_gw_reselect(bat_priv);
 out:
 	if (curr_gw_orig)
 		batadv_orig_node_put(curr_gw_orig);
-	if (router_gw)
-		batadv_neigh_node_put(router_gw);
-	if (router_orig)
-		batadv_neigh_node_put(router_orig);
-	if (router_gw_tq)
-		batadv_neigh_ifinfo_put(router_gw_tq);
-	if (router_orig_tq)
-		batadv_neigh_ifinfo_put(router_orig_tq);
 }
 
 /**
@@ -445,14 +339,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 	if (!gw_node)
 		return;
 
-	kref_get(&orig_node->refcount);
+	kref_init(&gw_node->refcount);
 	INIT_HLIST_NODE(&gw_node->list);
+	kref_get(&orig_node->refcount);
 	gw_node->orig_node = orig_node;
 	gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
 	gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
-	kref_init(&gw_node->refcount);
 
 	spin_lock_bh(&bat_priv->gw.list_lock);
+	kref_get(&gw_node->refcount);
 	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 
@@ -463,6 +358,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 		   ntohl(gateway->bandwidth_down) % 10,
 		   ntohl(gateway->bandwidth_up) / 10,
 		   ntohl(gateway->bandwidth_up) % 10);
+
+	/* don't return reference to new gw_node */
+	batadv_gw_node_put(gw_node);
 }
 
 /**
@@ -472,9 +370,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
  *
  * Return: gateway node if found or NULL otherwise.
  */
-static struct batadv_gw_node *
-batadv_gw_node_get(struct batadv_priv *bat_priv,
-		   struct batadv_orig_node *orig_node)
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node)
 {
 	struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
 
@@ -585,81 +482,87 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 }
 
-/* fails if orig_node has no router */
-static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
-				    struct seq_file *seq,
-				    const struct batadv_gw_node *gw_node)
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 {
-	struct batadv_gw_node *curr_gw;
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
-	int ret = -1;
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct batadv_priv *bat_priv = netdev_priv(net_dev);
+	struct batadv_hard_iface *primary_if;
 
-	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
-	if (!router)
-		goto out;
+	primary_if = batadv_seq_print_text_primary_if_get(seq);
+	if (!primary_if)
+		return 0;
 
-	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
-	if (!router_ifinfo)
-		goto out;
+	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
+		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
+		   primary_if->net_dev->dev_addr, net_dev->name,
+		   bat_priv->algo_ops->name);
 
-	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+	batadv_hardif_put(primary_if);
 
-	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
-		   (curr_gw == gw_node ? "=>" : "  "),
-		   gw_node->orig_node->orig,
-		   router_ifinfo->bat_iv.tq_avg, router->addr,
-		   router->if_incoming->net_dev->name,
-		   gw_node->bandwidth_down / 10,
-		   gw_node->bandwidth_down % 10,
-		   gw_node->bandwidth_up / 10,
-		   gw_node->bandwidth_up % 10);
-	ret = seq_has_overflowed(seq) ? -1 : 0;
+	if (!bat_priv->algo_ops->gw.print) {
+		seq_puts(seq,
+			 "No printing function for this routing protocol\n");
+		return 0;
+	}
 
-	if (curr_gw)
-		batadv_gw_node_put(curr_gw);
-out:
-	if (router_ifinfo)
-		batadv_neigh_ifinfo_put(router_ifinfo);
-	if (router)
-		batadv_neigh_node_put(router);
-	return ret;
+	bat_priv->algo_ops->gw.print(bat_priv, seq);
+
+	return 0;
 }
+#endif
 
-int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
 {
-	struct net_device *net_dev = (struct net_device *)seq->private;
-	struct batadv_priv *bat_priv = netdev_priv(net_dev);
-	struct batadv_hard_iface *primary_if;
-	struct batadv_gw_node *gw_node;
-	int gw_count = 0;
-
-	primary_if = batadv_seq_print_text_primary_if_get(seq);
-	if (!primary_if)
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
 		goto out;
+	}
 
-	seq_printf(seq,
-		   "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
-		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
-		   primary_if->net_dev->dev_addr, net_dev->name);
+	bat_priv = netdev_priv(soft_iface);
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		/* fails if orig_node has no router */
-		if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
-			continue;
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
 
-		gw_count++;
+	if (!bat_priv->algo_ops->gw.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
-	rcu_read_unlock();
 
-	if (gw_count == 0)
-		seq_puts(seq, "No gateways in range ...\n");
+	bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+	ret = msg->len;
 
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
-	return 0;
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
 }
 
 /**
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 582dd8c413c8..859166d03561 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct batadv_tvlv_gateway_data;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -39,10 +40,16 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 void batadv_gw_node_delete(struct batadv_priv *bat_priv,
 			   struct batadv_orig_node *orig_node);
 void batadv_gw_node_free(struct batadv_priv *bat_priv);
+void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+struct batadv_gw_node *
+batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 enum batadv_dhcp_recipient
 batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
 			     u8 *chaddr);
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node);
 
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index d7bc6a87bcc9..21184810d89f 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 
 	batadv_gw_node_update(bat_priv, orig, &gateway);
 
-	/* restart gateway selection if fast or late switching was enabled */
+	/* restart gateway selection */
 	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
-	    (atomic_read(&bat_priv->gw.sel_class) > 2))
+	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 1f9080840566..08ce36147c4c 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -35,7 +35,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
 
 #include "bat_v.h"
 #include "bridge_loop_avoidance.h"
@@ -85,25 +86,55 @@ out:
 }
 
 /**
+ * batadv_getlink_net - return link net namespace (of use fallback)
+ * @netdev: net_device to check
+ * @fallback_net: return in case get_link_net is not available for @netdev
+ *
+ * Return: result of rtnl_link_ops->get_link_net or @fallback_net
+ */
+static const struct net *batadv_getlink_net(const struct net_device *netdev,
+					    const struct net *fallback_net)
+{
+	if (!netdev->rtnl_link_ops)
+		return fallback_net;
+
+	if (!netdev->rtnl_link_ops->get_link_net)
+		return fallback_net;
+
+	return netdev->rtnl_link_ops->get_link_net(netdev);
+}
+
+/**
  * batadv_mutual_parents - check if two devices are each others parent
- * @dev1: 1st net_device
- * @dev2: 2nd net_device
+ * @dev1: 1st net dev
+ * @net1: 1st devices netns
+ * @dev2: 2nd net dev
+ * @net2: 2nd devices netns
  *
  * veth devices come in pairs and each is the parent of the other!
  *
  * Return: true if the devices are each others parent, otherwise false
  */
 static bool batadv_mutual_parents(const struct net_device *dev1,
-				  const struct net_device *dev2)
+				  const struct net *net1,
+				  const struct net_device *dev2,
+				  const struct net *net2)
 {
 	int dev1_parent_iflink = dev_get_iflink(dev1);
 	int dev2_parent_iflink = dev_get_iflink(dev2);
+	const struct net *dev1_parent_net;
+	const struct net *dev2_parent_net;
+
+	dev1_parent_net = batadv_getlink_net(dev1, net1);
+	dev2_parent_net = batadv_getlink_net(dev2, net2);
 
 	if (!dev1_parent_iflink || !dev2_parent_iflink)
 		return false;
 
 	return (dev1_parent_iflink == dev2->ifindex) &&
-	       (dev2_parent_iflink == dev1->ifindex);
+	       (dev2_parent_iflink == dev1->ifindex) &&
+	       net_eq(dev1_parent_net, net2) &&
+	       net_eq(dev2_parent_net, net1);
 }
 
 /**
@@ -121,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
  */
 static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 {
-	struct net_device *parent_dev;
 	struct net *net = dev_net(net_dev);
+	struct net_device *parent_dev;
+	const struct net *parent_net;
 	bool ret;
 
 	/* check if this is a batman-adv mesh interface */
@@ -134,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 	    dev_get_iflink(net_dev) == net_dev->ifindex)
 		return false;
 
+	parent_net = batadv_getlink_net(net_dev, net);
+
 	/* recurse over the parent device */
-	parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
+	parent_dev = __dev_get_by_index((struct net *)parent_net,
+					dev_get_iflink(net_dev));
 	/* if we got a NULL parent_dev there is something broken.. */
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
 
-	if (batadv_mutual_parents(net_dev, parent_dev))
+	if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
 		return false;
 
 	ret = batadv_is_on_batman_iface(parent_dev);
@@ -625,25 +660,6 @@ out:
 		batadv_hardif_put(primary_if);
 }
 
-/**
- * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
- * @work: work queue item
- *
- * Free the parts of the hard interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_hardif_remove_interface_finish(struct work_struct *work)
-{
-	struct batadv_hard_iface *hard_iface;
-
-	hard_iface = container_of(work, struct batadv_hard_iface,
-				  cleanup_work);
-
-	batadv_debugfs_del_hardif(hard_iface);
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-	batadv_hardif_put(hard_iface);
-}
-
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -676,10 +692,9 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 
 	INIT_LIST_HEAD(&hard_iface->list);
 	INIT_HLIST_HEAD(&hard_iface->neigh_list);
-	INIT_WORK(&hard_iface->cleanup_work,
-		  batadv_hardif_remove_interface_finish);
 
 	spin_lock_init(&hard_iface->neigh_list_lock);
+	kref_init(&hard_iface->refcount);
 
 	hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
 	if (batadv_is_wifi_netdev(net_dev))
@@ -687,11 +702,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 
 	batadv_v_hardif_init(hard_iface);
 
-	/* extra reference for return */
-	kref_init(&hard_iface->refcount);
-	kref_get(&hard_iface->refcount);
-
 	batadv_check_known_mac_addr(hard_iface->net_dev);
+	kref_get(&hard_iface->refcount);
 	list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
 
 	return hard_iface;
@@ -713,13 +725,15 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
 	/* first deactivate interface */
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
 		batadv_hardif_disable_interface(hard_iface,
-						BATADV_IF_CLEANUP_AUTO);
+						BATADV_IF_CLEANUP_KEEP);
 
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
 		return;
 
 	hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-	queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
+	batadv_debugfs_del_hardif(hard_iface);
+	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+	batadv_hardif_put(hard_iface);
 }
 
 void batadv_hardif_remove_interfaces(void)
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 618d5de06f20..e44a7da51431 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -26,9 +26,25 @@ struct batadv_icmp_header;
 
 #define BATADV_ICMP_SOCKET "socket"
 
-void batadv_socket_init(void);
 int batadv_socket_setup(struct batadv_priv *bat_priv);
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+void batadv_socket_init(void);
 void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
 				  size_t icmp_len);
 
+#else
+
+static inline void batadv_socket_init(void)
+{
+}
+
+static inline void
+batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len)
+{
+}
+
+#endif
+
 #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fe4c5e29f96b..2c017ab47557 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void);
 
 static int __init batadv_init(void)
 {
+	int ret;
+
+	ret = batadv_tt_cache_init();
+	if (ret < 0)
+		return ret;
+
 	INIT_LIST_HEAD(&batadv_hardif_list);
 	batadv_algo_init();
 
@@ -93,9 +99,8 @@ static int __init batadv_init(void)
 	batadv_tp_meter_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
-
 	if (!batadv_event_workqueue)
-		return -ENOMEM;
+		goto err_create_wq;
 
 	batadv_socket_init();
 	batadv_debugfs_init();
@@ -108,6 +113,11 @@ static int __init batadv_init(void)
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
 
 	return 0;
+
+err_create_wq:
+	batadv_tt_cache_destroy();
+
+	return -ENOMEM;
 }
 
 static void __exit batadv_exit(void)
@@ -123,6 +133,8 @@ static void __exit batadv_exit(void)
 	batadv_event_workqueue = NULL;
 
 	rcu_barrier();
+
+	batadv_tt_cache_destroy();
 }
 
 int batadv_mesh_init(struct net_device *soft_iface)
@@ -270,6 +282,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
 	return is_my_mac;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_seq_print_text_primary_if_get - called from debugfs table printing
  *  function that requires the primary interface
@@ -305,6 +318,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
 out:
 	return primary_if;
 }
+#endif
 
 /**
  * batadv_max_header_len - calculate maximum encapsulation overhead for a
@@ -638,3 +652,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR);
 MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
 MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
+MODULE_ALIAS_RTNL_LINK("batadv");
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 06a860845434..09af21e27639 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.3"
+#define BATADV_SOURCE_VERSION "2016.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cc915073a753..13661f43386f 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -528,7 +528,7 @@ update:
 	}
 
 	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
+		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**
@@ -1134,6 +1134,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
  * @bat_priv: the bat priv with all the soft interface information
@@ -1234,6 +1235,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 
 	return 0;
 }
+#endif
 
 /**
  * batadv_mcast_free - free the multicast optimizations structures
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 231f8eaf075b..18831e72b0fb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -18,6 +18,8 @@
 #include "netlink.h"
 #include "main.h"
 
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genetlink.h>
@@ -26,24 +28,33 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
+#include "translation-table.h"
 
-struct sk_buff;
-
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
 	.version = 1,
 	.maxattr = BATADV_ATTR_MAX,
+	.netnsok = true,
 };
 
 /* multicast groups */
@@ -69,9 +80,44 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
 	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
 	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
 };
 
 /**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+	return attr ? nla_get_u32(attr) : 0;
+}
+
+/**
  * batadv_netlink_mesh_info_put - fill in generic information about mesh
  *  interface
  * @msg: netlink message to be sent back
@@ -93,8 +139,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 	    nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
 	    nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
 	    nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
-		    soft_iface->dev_addr))
+		    soft_iface->dev_addr) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
+		       (u8)atomic_read(&bat_priv->tt.vn)))
+		goto out;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			ntohs(bat_priv->bla.claim_dest.group)))
 		goto out;
+#endif
 
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -380,6 +434,106 @@ out:
 	return ret;
 }
 
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_hard_iface *hard_iface)
+{
+	struct net_device *net_dev = hard_iface->net_dev;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_HARDIFS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			net_dev->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   net_dev->name) ||
+	    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+		    net_dev->dev_addr))
+		goto nla_put_failure;
+
+	if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+		if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hard_iface *hard_iface;
+	int ifindex;
+	int portid = NETLINK_CB(cb->skb).portid;
+	int seq = cb->nlh->nlmsg_seq;
+	int skip = cb->args[0];
+	int i = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface)
+		return -ENODEV;
+
+	if (!batadv_softif_is_valid(soft_iface)) {
+		dev_put(soft_iface);
+		return -ENODEV;
+	}
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
+			continue;
+
+		if (i++ < skip)
+			continue;
+
+		if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+						     hard_iface)) {
+			i--;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+
+	dev_put(soft_iface);
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
+
 static struct genl_ops batadv_netlink_ops[] = {
 	{
 		.cmd = BATADV_CMD_GET_MESH_INFO,
@@ -399,6 +553,61 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_cancel,
 	},
+	{
+		.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_algo_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_HARDIFS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_netlink_dump_hardifs,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_local_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_global_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_ORIGINATORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_orig_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_NEIGHBORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_hardif_neigh_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_GATEWAYS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_gw_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_CLAIM,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_claim_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_BACKBONE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_backbone_dump,
+	},
+
 };
 
 /**
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 945653ab58c6..52eb16281aba 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,18 @@
 #include "main.h"
 
 #include <linux/types.h>
+#include <net/genetlink.h>
+
+struct nlmsghdr;
 
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
 
 int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
 				  u8 result, u32 test_time, u64 total_bytes,
 				  u32 cookie);
 
+extern struct genl_family batadv_netlink_family;
+
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 293ef4ffd4e1..e3baf697a35c 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -856,14 +856,12 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
 	if (!nc_node)
 		return NULL;
 
-	kref_get(&orig_neigh_node->refcount);
-
 	/* Initialize nc_node */
 	INIT_LIST_HEAD(&nc_node->list);
+	kref_init(&nc_node->refcount);
 	ether_addr_copy(nc_node->addr, orig_node->orig);
+	kref_get(&orig_neigh_node->refcount);
 	nc_node->orig_node = orig_neigh_node;
-	kref_init(&nc_node->refcount);
-	kref_get(&nc_node->refcount);
 
 	/* Select ingoing or outgoing coding node */
 	if (in_coding) {
@@ -879,6 +877,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
 
 	/* Add nc_node to orig_node */
 	spin_lock_bh(lock);
+	kref_get(&nc_node->refcount);
 	list_add_tail_rcu(&nc_node->list, list);
 	spin_unlock_bh(lock);
 
@@ -979,7 +978,6 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
 	INIT_LIST_HEAD(&nc_path->packet_list);
 	spin_lock_init(&nc_path->packet_list_lock);
 	kref_init(&nc_path->refcount);
-	kref_get(&nc_path->refcount);
 	nc_path->last_valid = jiffies;
 	ether_addr_copy(nc_path->next_hop, dst);
 	ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +987,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
 		   nc_path->next_hop);
 
 	/* Add nc_path to hash table */
+	kref_get(&nc_path->refcount);
 	hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
 				     batadv_nc_hash_choose, &nc_path_key,
 				     &nc_path->hash_entry);
@@ -1882,6 +1881,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
 	batadv_hash_destroy(bat_priv->nc.decoding_hash);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_nc_nodes_seq_print_text - print the nc node information
  * @seq: seq file to print on
@@ -1981,3 +1981,4 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
 out:
 	return -ENOMEM;
 }
+#endif
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3940b5d24421..5f3bfc41aeb1 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,11 +28,15 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "distributed-arp-table.h"
@@ -42,8 +46,10 @@
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 
 /* hash class keys */
@@ -127,9 +133,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
 		goto out;
 
 	kref_init(&vlan->refcount);
-	kref_get(&vlan->refcount);
 	vlan->vid = vid;
 
+	kref_get(&vlan->refcount);
 	hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
 
 out:
@@ -380,6 +386,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
 	orig_ifinfo->if_outgoing = if_outgoing;
 	INIT_HLIST_NODE(&orig_ifinfo->list);
 	kref_init(&orig_ifinfo->refcount);
+
 	kref_get(&orig_ifinfo->refcount);
 	hlist_add_head_rcu(&orig_ifinfo->list,
 			   &orig_node->ifinfo_list);
@@ -453,9 +460,9 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
 
 	INIT_HLIST_NODE(&neigh_ifinfo->list);
 	kref_init(&neigh_ifinfo->refcount);
-	kref_get(&neigh_ifinfo->refcount);
 	neigh_ifinfo->if_outgoing = if_outgoing;
 
+	kref_get(&neigh_ifinfo->refcount);
 	hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
 
 out:
@@ -647,8 +654,8 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
 
 	/* extra reference for return */
 	kref_init(&neigh_node->refcount);
-	kref_get(&neigh_node->refcount);
 
+	kref_get(&neigh_node->refcount);
 	hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
 
 	batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
@@ -686,6 +693,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
 	return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
  * @seq: neighbour table seq_file struct
@@ -719,6 +727,84 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 	bat_priv->algo_ops->neigh.print(bat_priv, seq);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->neigh.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
 
 /**
  * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
@@ -905,7 +991,6 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
 
 	/* extra reference for return */
 	kref_init(&orig_node->refcount);
-	kref_get(&orig_node->refcount);
 
 	orig_node->bat_priv = bat_priv;
 	ether_addr_copy(orig_node->orig, addr);
@@ -1256,6 +1341,7 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
 	_batadv_purge_orig(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1329,6 +1415,84 @@ out:
 		batadv_hardif_put(hard_iface);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->orig.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
 
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 566306bf05dc..ebc56183f358 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -31,7 +31,9 @@
 
 #include "hash.h"
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
 			struct batadv_hard_iface *if_outgoing);
 void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
 
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 
 struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
 void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6b011ff64dd8..6afc0b86950e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -129,42 +129,6 @@ enum batadv_tt_data_flags {
 };
 
 /**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- *  update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- *  This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- *  information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- *  not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- *  in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- *  the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
-	BATADV_TT_CLIENT_DEL     = BIT(0),
-	BATADV_TT_CLIENT_ROAM    = BIT(1),
-	BATADV_TT_CLIENT_WIFI    = BIT(4),
-	BATADV_TT_CLIENT_ISOLA	 = BIT(5),
-	BATADV_TT_CLIENT_NOPURGE = BIT(8),
-	BATADV_TT_CLIENT_NEW     = BIT(9),
-	BATADV_TT_CLIENT_PENDING = BIT(10),
-	BATADV_TT_CLIENT_TEMP	 = BIT(11),
-};
-
-/**
  * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
  * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
  */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7602c001e92b..610f2c45edcd 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 	if (!orig_ifinfo)
 		return;
 
-	rcu_read_lock();
-	curr_router = rcu_dereference(orig_ifinfo->router);
-	if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
-		curr_router = NULL;
-	rcu_read_unlock();
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	/* curr_router used earlier may not be the current orig_ifinfo->router
+	 * anymore because it was dereferenced outside of the neigh_list_lock
+	 * protected region. After the new best neighbor has replace the current
+	 * best neighbor the reference counter needs to decrease. Consequently,
+	 * the code needs to ensure the curr_router variable contains a pointer
+	 * to the replaced best neighbor.
+	 */
+	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+	/* increase refcount of new best neighbor */
+	if (neigh_node)
+		kref_get(&neigh_node->refcount);
+
+	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+	batadv_orig_ifinfo_put(orig_ifinfo);
 
 	/* route deleted */
 	if ((curr_router) && (!neigh_node)) {
@@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 			   curr_router->addr);
 	}
 
-	if (curr_router)
-		batadv_neigh_node_put(curr_router);
-
-	spin_lock_bh(&orig_node->neigh_list_lock);
-	/* curr_router used earlier may not be the current orig_ifinfo->router
-	 * anymore because it was dereferenced outside of the neigh_list_lock
-	 * protected region. After the new best neighbor has replace the current
-	 * best neighbor the reference counter needs to decrease. Consequently,
-	 * the code needs to ensure the curr_router variable contains a pointer
-	 * to the replaced best neighbor.
-	 */
-	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
-	/* increase refcount of new best neighbor */
-	if (neigh_node)
-		kref_get(&neigh_node->refcount);
-
-	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
-	spin_unlock_bh(&orig_node->neigh_list_lock);
-	batadv_orig_ifinfo_put(orig_ifinfo);
-
 	/* decrease refcount of previous best neighbor */
 	if (curr_router)
 		batadv_neigh_node_put(curr_router);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 6191159484df..8d4e1f578574 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -315,8 +315,7 @@ out:
  *
  * Wrap the given skb into a batman-adv unicast or unicast-4addr header
  * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
- * as packet_type. Then send this frame to the given orig_node and release a
- * reference to this orig_node.
+ * as packet_type. Then send this frame to the given orig_node.
  *
  * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
  */
@@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 		ret = NET_XMIT_SUCCESS;
 
 out:
-	if (orig_node)
-		batadv_orig_node_put(orig_node);
 	if (ret == NET_XMIT_DROP)
 		kfree_skb(skb);
 	return ret;
@@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
 	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
 	struct batadv_orig_node *orig_node;
 	u8 *src, *dst;
+	int ret;
 
 	src = ethhdr->h_source;
 	dst = ethhdr->h_dest;
@@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
 	}
 	orig_node = batadv_transtable_search(bat_priv, src, dst, vid);
 
-	return batadv_send_skb_unicast(bat_priv, skb, packet_type,
-				       packet_subtype, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
+				      packet_subtype, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
 /**
@@ -433,12 +436,25 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			   unsigned short vid)
 {
 	struct batadv_orig_node *orig_node;
+	int ret;
 
 	orig_node = batadv_gw_get_selected_orig(bat_priv);
-	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
-				       BATADV_P_DATA, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+				      BATADV_P_DATA, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
+/**
+ * batadv_forw_packet_free - free a forwarding packet
+ * @forw_packet: The packet to free
+ *
+ * This frees a forwarding packet and releases any resources it might
+ * have claimed.
+ */
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
 	kfree_skb(forw_packet->skb);
@@ -446,9 +462,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 		batadv_hardif_put(forw_packet->if_incoming);
 	if (forw_packet->if_outgoing)
 		batadv_hardif_put(forw_packet->if_outgoing);
+	if (forw_packet->queue_left)
+		atomic_inc(forw_packet->queue_left);
 	kfree(forw_packet);
 }
 
+/**
+ * batadv_forw_packet_alloc - allocate a forwarding packet
+ * @if_incoming: The (optional) if_incoming to be grabbed
+ * @if_outgoing: The (optional) if_outgoing to be grabbed
+ * @queue_left: The (optional) queue counter to decrease
+ * @bat_priv: The bat_priv for the mesh of this forw_packet
+ *
+ * Allocates a forwarding packet and tries to get a reference to the
+ * (optional) if_incoming, if_outgoing and queue_left. If queue_left
+ * is NULL then bat_priv is optional, too.
+ *
+ * Return: An allocated forwarding packet on success, NULL otherwise.
+ */
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv)
+{
+	struct batadv_forw_packet *forw_packet;
+	const char *qname;
+
+	if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) {
+		qname = "unknown";
+
+		if (queue_left == &bat_priv->bcast_queue_left)
+			qname = "bcast";
+
+		if (queue_left == &bat_priv->batman_queue_left)
+			qname = "batman";
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "%s queue is full\n", qname);
+
+		return NULL;
+	}
+
+	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+	if (!forw_packet)
+		goto err;
+
+	if (if_incoming)
+		kref_get(&if_incoming->refcount);
+
+	if (if_outgoing)
+		kref_get(&if_outgoing->refcount);
+
+	forw_packet->skb = NULL;
+	forw_packet->queue_left = queue_left;
+	forw_packet->if_incoming = if_incoming;
+	forw_packet->if_outgoing = if_outgoing;
+	forw_packet->num_packets = 0;
+
+	return forw_packet;
+
+err:
+	if (queue_left)
+		atomic_inc(queue_left);
+
+	return NULL;
+}
+
 static void
 _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 				 struct batadv_forw_packet *forw_packet,
@@ -487,24 +567,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	struct batadv_bcast_packet *bcast_packet;
 	struct sk_buff *newskb;
 
-	if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
-		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "bcast packet queue full\n");
-		goto out;
-	}
-
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (!primary_if)
-		goto out_and_inc;
-
-	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+		goto err;
 
+	forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+					       &bat_priv->bcast_queue_left,
+					       bat_priv);
+	batadv_hardif_put(primary_if);
 	if (!forw_packet)
-		goto out_and_inc;
+		goto err;
 
 	newskb = skb_copy(skb, GFP_ATOMIC);
 	if (!newskb)
-		goto packet_free;
+		goto err_packet_free;
 
 	/* as we have a copy now, it is safe to decrease the TTL */
 	bcast_packet = (struct batadv_bcast_packet *)newskb->data;
@@ -513,11 +589,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	skb_reset_mac_header(newskb);
 
 	forw_packet->skb = newskb;
-	forw_packet->if_incoming = primary_if;
-	forw_packet->if_outgoing = NULL;
-
-	/* how often did we send the bcast packet ? */
-	forw_packet->num_packets = 0;
 
 	INIT_DELAYED_WORK(&forw_packet->delayed_work,
 			  batadv_send_outstanding_bcast_packet);
@@ -525,13 +596,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
-packet_free:
-	kfree(forw_packet);
-out_and_inc:
-	atomic_inc(&bat_priv->bcast_queue_left);
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
+err_packet_free:
+	batadv_forw_packet_free(forw_packet);
+err:
 	return NETDEV_TX_BUSY;
 }
 
@@ -592,7 +659,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 
 out:
 	batadv_forw_packet_free(forw_packet);
-	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
 void
@@ -633,9 +699,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->bcast_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
@@ -663,9 +726,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->batman_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 7cecb7563b45..999f78683d9e 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -28,6 +28,12 @@
 struct sk_buff;
 
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv);
+
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_hard_iface *recv_if);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7527c0652dd5..49e16b6e0ba3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
@@ -46,7 +47,6 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/workqueue.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -57,6 +57,7 @@
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
+#include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "sysfs.h"
@@ -377,6 +378,8 @@ dropped:
 dropped_freed:
 	batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
 end:
+	if (mcast_single_orig)
+		batadv_orig_node_put(mcast_single_orig);
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 	return NETDEV_TX_OK;
@@ -591,6 +594,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 	}
 
 	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+	kref_get(&vlan->refcount);
 	hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
 	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 
@@ -601,6 +605,9 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 			    bat_priv->soft_iface->dev_addr, vid,
 			    BATADV_NULL_IFINDEX, BATADV_NO_MARK);
 
+	/* don't return reference to new softif_vlan */
+	batadv_softif_vlan_put(vlan);
+
 	return 0;
 }
 
@@ -747,34 +754,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
 }
 
 /**
- * batadv_softif_destroy_finish - cleans up the remains of a softif
- * @work: work queue item
- *
- * Free the parts of the soft interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_softif_destroy_finish(struct work_struct *work)
-{
-	struct batadv_softif_vlan *vlan;
-	struct batadv_priv *bat_priv;
-	struct net_device *soft_iface;
-
-	bat_priv = container_of(work, struct batadv_priv,
-				cleanup_work);
-	soft_iface = bat_priv->soft_iface;
-
-	/* destroy the "untagged" VLAN */
-	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
-	if (vlan) {
-		batadv_softif_destroy_vlan(bat_priv, vlan);
-		batadv_softif_vlan_put(vlan);
-	}
-
-	batadv_sysfs_del_meshif(soft_iface);
-	unregister_netdev(soft_iface);
-}
-
-/**
  * batadv_softif_init_late - late stage initialization of soft interface
  * @dev: registered network device to modify
  *
@@ -791,7 +770,6 @@ static int batadv_softif_init_late(struct net_device *dev)
 
 	bat_priv = netdev_priv(dev);
 	bat_priv->soft_iface = dev;
-	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
 	/* batadv_interface_stats() needs to be available as soon as
 	 * register_netdevice() has been called
@@ -1028,8 +1006,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
 void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_softif_vlan *vlan;
 
-	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+	ASSERT_RTNL();
+
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
+	batadv_sysfs_del_meshif(soft_iface);
+	unregister_netdevice(soft_iface);
 }
 
 /**
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe9ca94ddee2..02d96f224c60 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -37,6 +37,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -428,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
+	/* GW mode is not available if the routing algorithm in use does not
+	 * implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
 	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		bytes_written = sprintf(buff, "%s\n",
@@ -455,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 	char *curr_gw_mode_str;
 	int gw_mode_tmp = -1;
 
+	/* toggling GW mode is allowed only if the routing algorithm in use
+	 * provides the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
 	if (buff[count - 1] == '\n')
 		buff[count - 1] = '\0';
 
@@ -514,6 +529,50 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
 	return count;
 }
 
+static ssize_t batadv_show_gw_sel_class(struct kobject *kobj,
+					struct attribute *attr, char *buff)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* GW selection class is not available if the routing algorithm in use
+	 * does not implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
+	if (bat_priv->algo_ops->gw.show_sel_class)
+		return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff);
+
+	return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class));
+}
+
+static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
+					 struct attribute *attr, char *buff,
+					 size_t count)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* setting the GW selection class is allowed only if the routing
+	 * algorithm in use implements the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (bat_priv->algo_ops->gw.store_sel_class)
+		return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
+							      count);
+
+	return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+					batadv_post_gw_reselect, attr,
+					&bat_priv->gw.sel_class,
+					bat_priv->soft_iface);
+}
+
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
 				     struct attribute *attr, char *buff)
 {
@@ -625,8 +684,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
 		     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
 		     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
-		     BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
+static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+		   batadv_store_gw_sel_class);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
@@ -712,6 +771,8 @@ rem_attr:
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 out:
@@ -726,6 +787,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 }
@@ -781,6 +844,10 @@ rem_attr:
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 out:
@@ -800,6 +867,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 }
@@ -828,31 +899,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
 	return length;
 }
 
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
-				       struct attribute *attr, char *buff,
-				       size_t count)
+/**
+ * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @ifname: name of soft-interface to modify
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ *
+ * Return: 0 on success, 0 < on failure
+ */
+static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
+					  char ifname[IFNAMSIZ])
 {
-	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct net *net = dev_net(net_dev);
 	struct batadv_hard_iface *hard_iface;
-	int status_tmp = -1;
-	int ret = count;
+	int status_tmp;
+	int ret = 0;
+
+	ASSERT_RTNL();
 
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
-		return count;
-
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if (strlen(buff) >= IFNAMSIZ) {
-		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
-		       buff);
-		batadv_hardif_put(hard_iface);
-		return -EINVAL;
-	}
+		return 0;
 
-	if (strncmp(buff, "none", 4) == 0)
+	if (strncmp(ifname, "none", 4) == 0)
 		status_tmp = BATADV_IF_NOT_IN_USE;
 	else
 		status_tmp = BATADV_IF_I_WANT_YOU;
@@ -861,15 +932,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
 		goto out;
 
 	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+	    (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
 		goto out;
 
-	rtnl_lock();
-
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
-		goto unlock;
+		goto out;
 	}
 
 	/* if the interface already is in use */
@@ -877,15 +946,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
 
-	ret = batadv_hardif_enable_interface(hard_iface, net, buff);
-
-unlock:
-	rtnl_unlock();
+	ret = batadv_hardif_enable_interface(hard_iface, net, ifname);
 out:
 	batadv_hardif_put(hard_iface);
 	return ret;
 }
 
+/**
+ * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * @work: work queue item
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ */
+static void batadv_store_mesh_iface_work(struct work_struct *work)
+{
+	struct batadv_store_mesh_work *store_work;
+	int ret;
+
+	store_work = container_of(work, struct batadv_store_mesh_work, work);
+
+	rtnl_lock();
+	ret = batadv_store_mesh_iface_finish(store_work->net_dev,
+					     store_work->soft_iface_name);
+	rtnl_unlock();
+
+	if (ret < 0)
+		pr_err("Failed to store new mesh_iface state %s for %s: %d\n",
+		       store_work->soft_iface_name, store_work->net_dev->name,
+		       ret);
+
+	dev_put(store_work->net_dev);
+	kfree(store_work);
+}
+
+static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
+				       struct attribute *attr, char *buff,
+				       size_t count)
+{
+	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+	struct batadv_store_mesh_work *store_work;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (strlen(buff) >= IFNAMSIZ) {
+		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
+		       buff);
+		return -EINVAL;
+	}
+
+	store_work = kmalloc(sizeof(*store_work), GFP_KERNEL);
+	if (!store_work)
+		return -ENOMEM;
+
+	dev_hold(net_dev);
+	INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
+	store_work->net_dev = net_dev;
+	strlcpy(store_work->soft_iface_name, buff,
+		sizeof(store_work->soft_iface_name));
+
+	queue_work(batadv_event_workqueue, &store_work->work);
+
+	return count;
+}
+
 static ssize_t batadv_show_iface_status(struct kobject *kobj,
 					struct attribute *attr, char *buff)
 {
@@ -1048,6 +1173,8 @@ out:
 
 void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
 {
+	kobject_uevent(*hardif_obj, KOBJ_REMOVE);
+	kobject_del(*hardif_obj);
 	kobject_put(*hardif_obj);
 	*hardif_obj = NULL;
 }
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7e6df7a4964a..7f663092f6de 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -22,12 +22,14 @@
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/byteorder/generic.h>
+#include <linux/cache.h>
 #include <linux/compiler.h>
 #include <linux/crc32c.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
+#include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
@@ -35,25 +37,39 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
 #include "tvlv.h"
 
+static struct kmem_cache *batadv_tl_cache __read_mostly;
+static struct kmem_cache *batadv_tg_cache __read_mostly;
+static struct kmem_cache *batadv_tt_orig_cache __read_mostly;
+static struct kmem_cache *batadv_tt_change_cache __read_mostly;
+static struct kmem_cache *batadv_tt_req_cache __read_mostly;
+static struct kmem_cache *batadv_tt_roam_cache __read_mostly;
+
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
 static struct lock_class_key batadv_tt_global_hash_lock_class_key;
@@ -205,6 +221,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
 }
 
 /**
+ * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_local_entry *tt_local_entry;
+
+	tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+				      common.rcu);
+
+	kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
+/**
  * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -218,7 +248,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
 
 	batadv_softif_vlan_put(tt_local_entry->vlan);
 
-	kfree_rcu(tt_local_entry, common.rcu);
+	call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
 }
 
 /**
@@ -234,6 +264,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
 }
 
 /**
+ * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_global_entry *tt_global_entry;
+
+	tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+				       common.rcu);
+
+	kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
+/**
  * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -246,7 +290,8 @@ static void batadv_tt_global_entry_release(struct kref *ref)
 				       common.refcount);
 
 	batadv_tt_global_del_orig_list(tt_global_entry);
-	kfree_rcu(tt_global_entry, common.rcu);
+
+	call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
 }
 
 /**
@@ -384,6 +429,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
 }
 
 /**
+ * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_orig_list_entry *orig_entry;
+
+	orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+	kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
+/**
  * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
  *  queue for free after rcu grace period
  * @ref: kref pointer of the tt orig entry
@@ -396,7 +454,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
 				  refcount);
 
 	batadv_orig_node_put(orig_entry->orig_node);
-	kfree_rcu(orig_entry, rcu);
+	call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
 }
 
 /**
@@ -426,7 +484,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
 	bool event_removed = false;
 	bool del_op_requested, del_op_entry;
 
-	tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
+	tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC);
 	if (!tt_change_node)
 		return;
 
@@ -467,8 +525,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
 		continue;
 del:
 		list_del(&entry->list);
-		kfree(entry);
-		kfree(tt_change_node);
+		kmem_cache_free(batadv_tt_change_cache, entry);
+		kmem_cache_free(batadv_tt_change_cache, tt_change_node);
 		event_removed = true;
 		goto unlock;
 	}
@@ -646,7 +704,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 		goto out;
 	}
 
-	tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+	tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC);
 	if (!tt_local)
 		goto out;
 
@@ -656,7 +714,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 		net_ratelimited_function(batadv_info, soft_iface,
 					 "adding TT local entry %pM to non-existent VLAN %d\n",
 					 addr, BATADV_PRINT_VID(vid));
-		kfree(tt_local);
+		kmem_cache_free(batadv_tl_cache, tt_local);
 		tt_local = NULL;
 		goto out;
 	}
@@ -676,7 +734,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 	if (batadv_is_wifi_netdev(in_dev))
 		tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
 	kref_init(&tt_local->common.refcount);
-	kref_get(&tt_local->common.refcount);
 	tt_local->last_seen = jiffies;
 	tt_local->common.added_at = tt_local->last_seen;
 	tt_local->vlan = vlan;
@@ -688,6 +745,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 	    is_multicast_ether_addr(addr))
 		tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
 
+	kref_get(&tt_local->common.refcount);
 	hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
 				     batadv_choose_tt, &tt_local->common,
 				     &tt_local->common.hash_entry);
@@ -959,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
 			tt_diff_entries_count++;
 		}
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 	spin_unlock_bh(&bat_priv->tt.changes_list_lock);
 
@@ -989,6 +1047,7 @@ container_register:
 	kfree(tt_data);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1056,6 +1115,165 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_tt_common_entry *common)
+{
+	void *hdr;
+	struct batadv_softif_vlan *vlan;
+	struct batadv_tt_local_entry *local;
+	unsigned int last_seen_msecs;
+	u32 crc;
+
+	local = container_of(common, struct batadv_tt_local_entry, common);
+	last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+	vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_softif_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_LOCAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct hlist_head *head, int *idx_s)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+					       common)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.local_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+						bat_priv, head, &idx))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	return ret;
+}
 
 static void
 batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
@@ -1259,7 +1477,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
 	list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
 				 list) {
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 
 	atomic_set(&bat_priv->tt.local_changes, 0);
@@ -1341,7 +1559,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 		goto out;
 	}
 
-	orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
+	orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
 	if (!orig_entry)
 		goto out;
 
@@ -1351,9 +1569,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 	orig_entry->orig_node = orig_node;
 	orig_entry->ttvn = ttvn;
 	kref_init(&orig_entry->refcount);
-	kref_get(&orig_entry->refcount);
 
 	spin_lock_bh(&tt_global->list_lock);
+	kref_get(&orig_entry->refcount);
 	hlist_add_head_rcu(&orig_entry->list,
 			   &tt_global->orig_list);
 	spin_unlock_bh(&tt_global->list_lock);
@@ -1411,7 +1629,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
 		goto out;
 
 	if (!tt_global_entry) {
-		tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
+		tt_global_entry = kmem_cache_zalloc(batadv_tg_cache,
+						    GFP_ATOMIC);
 		if (!tt_global_entry)
 			goto out;
 
@@ -1428,13 +1647,13 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
 		if (flags & BATADV_TT_CLIENT_ROAM)
 			tt_global_entry->roam_at = jiffies;
 		kref_init(&common->refcount);
-		kref_get(&common->refcount);
 		common->added_at = jiffies;
 
 		INIT_HLIST_HEAD(&tt_global_entry->orig_list);
 		atomic_set(&tt_global_entry->orig_list_count, 0);
 		spin_lock_init(&tt_global_entry->list_lock);
 
+		kref_get(&common->refcount);
 		hash_added = batadv_hash_add(bat_priv->tt.global_hash,
 					     batadv_compare_tt,
 					     batadv_choose_tt, common,
@@ -1579,6 +1798,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
 	return best_entry;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_tt_global_print_entry - print all orig nodes who announce the address
  *  for this global entry
@@ -1702,6 +1922,219 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_tt_common_entry *common,
+			       struct batadv_tt_orig_list_entry *orig,
+			       bool best)
+{
+	void *hdr;
+	struct batadv_orig_node_vlan *vlan;
+	u8 last_ttvn;
+	u32 crc;
+
+	vlan = batadv_orig_node_vlan_get(orig->orig_node,
+					 common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_orig_node_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_tt_common_entry *common, int *sub_s)
+{
+	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+	struct batadv_tt_global_entry *global;
+	struct hlist_head *head;
+	int sub = 0;
+	bool best;
+
+	global = container_of(common, struct batadv_tt_global_entry, common);
+	best_entry = batadv_transtable_best_orig(bat_priv, global);
+	head = &global->orig_list;
+
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (orig_entry == best_entry);
+
+		if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+						   orig_entry, best)) {
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_priv *bat_priv,
+			     struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+						common, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump -  Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.global_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_global_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, bat_priv,
+						 head, &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+
+	return ret;
+}
 
 /**
  * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
@@ -2280,7 +2713,7 @@ static void batadv_tt_req_node_release(struct kref *ref)
 
 	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
 
-	kfree(tt_req_node);
+	kmem_cache_free(batadv_tt_req_cache, tt_req_node);
 }
 
 /**
@@ -2367,7 +2800,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
 			goto unlock;
 	}
 
-	tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC);
+	tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC);
 	if (!tt_req_node)
 		goto unlock;
 
@@ -3104,7 +3537,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
 
 	list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
@@ -3121,7 +3554,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
 			continue;
 
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
 }
@@ -3162,7 +3595,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
 	}
 
 	if (!ret) {
-		tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC);
+		tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache,
+						GFP_ATOMIC);
 		if (!tt_roam_node)
 			goto unlock;
 
@@ -3865,3 +4299,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 
 	return ret;
 }
+
+/**
+ * batadv_tt_cache_init - Initialize tt memory object cache
+ *
+ * Return: 0 on success or negative error number in case of failure.
+ */
+int __init batadv_tt_cache_init(void)
+{
+	size_t tl_size = sizeof(struct batadv_tt_local_entry);
+	size_t tg_size = sizeof(struct batadv_tt_global_entry);
+	size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry);
+	size_t tt_change_size = sizeof(struct batadv_tt_change_node);
+	size_t tt_req_size = sizeof(struct batadv_tt_req_node);
+	size_t tt_roam_size = sizeof(struct batadv_tt_roam_node);
+
+	batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tl_cache)
+		return -ENOMEM;
+
+	batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tg_cache)
+		goto err_tt_tl_destroy;
+
+	batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache",
+						 tt_orig_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_orig_cache)
+		goto err_tt_tg_destroy;
+
+	batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache",
+						   tt_change_size, 0,
+						   SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_change_cache)
+		goto err_tt_orig_destroy;
+
+	batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache",
+						tt_req_size, 0,
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_req_cache)
+		goto err_tt_change_destroy;
+
+	batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache",
+						 tt_roam_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_roam_cache)
+		goto err_tt_req_destroy;
+
+	return 0;
+
+err_tt_req_destroy:
+	kmem_cache_destroy(batadv_tt_req_cache);
+	batadv_tt_req_cache = NULL;
+err_tt_change_destroy:
+	kmem_cache_destroy(batadv_tt_change_cache);
+	batadv_tt_change_cache = NULL;
+err_tt_orig_destroy:
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	batadv_tt_orig_cache = NULL;
+err_tt_tg_destroy:
+	kmem_cache_destroy(batadv_tg_cache);
+	batadv_tg_cache = NULL;
+err_tt_tl_destroy:
+	kmem_cache_destroy(batadv_tl_cache);
+	batadv_tl_cache = NULL;
+
+	return -ENOMEM;
+}
+
+/**
+ * batadv_tt_cache_destroy - Destroy tt memory object cache
+ */
+void batadv_tt_cache_destroy(void)
+{
+	kmem_cache_destroy(batadv_tl_cache);
+	kmem_cache_destroy(batadv_tg_cache);
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	kmem_cache_destroy(batadv_tt_change_cache);
+	kmem_cache_destroy(batadv_tt_req_cache);
+	kmem_cache_destroy(batadv_tt_roam_cache);
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 7c7e2c006bfe..783fdba84db2 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -22,8 +22,10 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct net_device;
 struct seq_file;
+struct sk_buff;
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv,
 			   const char *message, bool roaming);
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
 			       s32 match_vid, const char *message);
@@ -59,4 +63,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
 bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 				  const u8 *addr, unsigned short vid);
 
+int batadv_tt_cache_init(void);
+void batadv_tt_cache_destroy(void);
+
 #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 3d1cf0fb112d..77654f055f24 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -257,8 +257,13 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
 	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
 	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
 	batadv_tvlv_container_remove(bat_priv, tvlv_old);
+
+	kref_get(&tvlv_new->refcount);
 	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
 	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+
+	/* don't return reference to new tvlv_container */
+	batadv_tvlv_container_put(tvlv_new);
 }
 
 /**
@@ -542,8 +547,12 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
 	INIT_HLIST_NODE(&tvlv_handler->list);
 
 	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	kref_get(&tvlv_handler->refcount);
 	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
 	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+
+	/* don't return reference to new tvlv_handler */
+	batadv_tvlv_handler_put(tvlv_handler);
 }
 
 /**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a64522c3b45d..b3dd1a381aad 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -132,7 +133,6 @@ struct batadv_hard_iface_bat_v {
  * @rcu: struct used for freeing in an RCU-safe manner
  * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
  * @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @cleanup_work: work queue callback item for hard-interface deinit
  * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
  * @neigh_list: list of unique single hop neighbors via this interface
  * @neigh_list_lock: lock protecting neigh_list
@@ -152,7 +152,6 @@ struct batadv_hard_iface {
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
 	struct batadv_hard_iface_bat_v bat_v;
 #endif
-	struct work_struct cleanup_work;
 	struct dentry *debug_dir;
 	struct hlist_head neigh_list;
 	/* neigh_list_lock protects: neigh_list */
@@ -1015,7 +1014,6 @@ struct batadv_priv_bat_v {
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
  * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
  * @algo_ops: routing algorithm used by this mesh interface
@@ -1074,7 +1072,6 @@ struct batadv_priv {
 	spinlock_t tp_list_lock; /* protects tp_list */
 	atomic_t tp_num;
 	struct delayed_work orig_work;
-	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
 	struct batadv_algo_ops *algo_ops;
 	struct hlist_head softif_vlan_list;
@@ -1379,6 +1376,7 @@ struct batadv_skb_cb {
  *  locally generated packet
  * @if_outgoing: packet where the packet should be sent to, or NULL if
  *  unspecified
+ * @queue_left: The queue (counter) this packet was applied to
  */
 struct batadv_forw_packet {
 	struct hlist_node list;
@@ -1391,11 +1389,13 @@ struct batadv_forw_packet {
 	struct delayed_work delayed_work;
 	struct batadv_hard_iface *if_incoming;
 	struct batadv_hard_iface *if_outgoing;
+	atomic_t *queue_left;
 };
 
 /**
  * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
  * @activate: start routing mechanisms when hard-interface is brought up
+ *  (optional)
  * @enable: init routing info when hard-interface is enabled
  * @disable: de-init routing info when hard-interface is disabled
  * @update_mac: (re-)init mac addresses of the protocol information
@@ -1413,11 +1413,13 @@ struct batadv_algo_iface_ops {
 /**
  * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
  * @hardif_init: called on creation of single hop entry
+ *  (optional)
  * @cmp: compare the metrics of two neighbors for their respective outgoing
  *  interfaces
  * @is_similar_or_better: check if neigh1 is equally similar or better than
  *  neigh2 for their respective outgoing interface from the metric prospective
  * @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
  */
 struct batadv_algo_neigh_ops {
 	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1429,26 +1431,64 @@ struct batadv_algo_neigh_ops {
 				     struct batadv_hard_iface *if_outgoing1,
 				     struct batadv_neigh_node *neigh2,
 				     struct batadv_hard_iface *if_outgoing2);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
 };
 
 /**
  * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
  * @free: free the resources allocated by the routing algorithm for an orig_node
- *  object
+ *  object (optional)
  * @add_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to a new hard-interface being added into the mesh
+ *  orig_node due to a new hard-interface being added into the mesh (optional)
  * @del_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to an hard-interface being removed from the mesh
+ *  orig_node due to an hard-interface being removed from the mesh (optional)
  * @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
  */
 struct batadv_algo_orig_ops {
 	void (*free)(struct batadv_orig_node *orig_node);
 	int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
 	int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
 		      int del_if_num);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
 		      struct batadv_hard_iface *hard_iface);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @store_sel_class: parse and stores a new GW selection class (optional)
+ * @show_sel_class: prints the current GW selection class (optional)
+ * @get_best_gw_node: select the best GW from the list of available nodes
+ *  (optional)
+ * @is_eligible: check if a newly discovered GW is a potential candidate for
+ *  the election as best GW (optional)
+ * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
+ */
+struct batadv_algo_gw_ops {
+	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
+				   size_t count);
+	ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+	struct batadv_gw_node *(*get_best_gw_node)
+		(struct batadv_priv *bat_priv);
+	bool (*is_eligible)(struct batadv_priv *bat_priv,
+			    struct batadv_orig_node *curr_gw_orig,
+			    struct batadv_orig_node *orig_node);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+	void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv);
 };
 
 /**
@@ -1458,6 +1498,7 @@ struct batadv_algo_orig_ops {
  * @iface: callbacks related to interface handling
  * @neigh: callbacks related to neighbors handling
  * @orig: callbacks related to originators handling
+ * @gw: callbacks related to GW mode
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
@@ -1465,6 +1506,7 @@ struct batadv_algo_ops {
 	struct batadv_algo_iface_ops iface;
 	struct batadv_algo_neigh_ops neigh;
 	struct batadv_algo_orig_ops orig;
+	struct batadv_algo_gw_ops gw;
 };
 
 /**
@@ -1564,4 +1606,17 @@ enum batadv_tvlv_handler_flags {
 	BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
 };
 
+/**
+ * struct batadv_store_mesh_work - Work queue item to detach add/del interface
+ *  from sysfs locks
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @soft_iface_name: name of soft-interface to modify
+ * @work: work queue item
+ */
+struct batadv_store_mesh_work {
+	struct net_device *net_dev;
+	char soft_iface_name[IFNAMSIZ];
+	struct work_struct work;
+};
+
 #endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index a1cda5d4718d..0aefc011b668 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
 
+bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
+
 obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 63a83d8d7da3..32a02de39cd2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 	vg = nbp_vlan_group_rcu(p);
 	return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
-		br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
+		br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+		nbp_switchdev_allowed_egress(p, skb);
 }
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f2fede05d32c..1da3221845f1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err5;
 
+	err = nbp_switchdev_mark_set(p);
+	if (err)
+		goto err6;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	err = nbp_vlan_init(p);
 	if (err) {
 		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
-		goto err6;
+		goto err7;
 	}
 
 	spin_lock_bh(&br->lock);
@@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	return 0;
 
-err6:
+err7:
 	list_del_rcu(&p->list);
 	br_fdb_delete_by_port(br, p, 0, 1);
 	nbp_update_port_count(br);
+err6:
 	netdev_upper_dev_unlink(dev, br->dev);
-
 err5:
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 	netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8e486203d133..3132cfc80e9d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 	if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
 		goto out;
 
+	nbp_switchdev_frame_mark(p, skb);
+
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
 	if (p->flags & BR_LEARNING)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f2a29e467e78..190a5bc00f4a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1245,14 +1245,30 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	return 0;
 }
 
-static size_t bridge_get_linkxstats_size(const struct net_device *dev)
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
 {
-	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p = NULL;
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
+	struct net_bridge *br;
 	int numvls = 0;
 
-	vg = br_vlan_group(br);
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		p = br_port_get_rtnl(dev);
+		if (!p)
+			return 0;
+		br = p->br;
+		vg = nbp_vlan_group(p);
+		break;
+	default:
+		return 0;
+	}
+
 	if (vg) {
 		/* we need to count all, even placeholder entries */
 		list_for_each_entry(v, &vg->vlan_list, vlist)
@@ -1264,45 +1280,42 @@ static size_t bridge_get_linkxstats_size(const struct net_device *dev)
 	       nla_total_size(0);
 }
 
-static size_t brport_get_linkxstats_size(const struct net_device *dev)
-{
-	return nla_total_size(sizeof(struct br_mcast_stats)) +
-	       nla_total_size(0);
-}
-
-static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+static int br_fill_linkxstats(struct sk_buff *skb,
+			      const struct net_device *dev,
+			      int *prividx, int attr)
 {
-	size_t retsize = 0;
+	struct nlattr *nla __maybe_unused;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+	struct net_bridge *br;
+	struct nlattr *nest;
+	int vl_idx = 0;
 
 	switch (attr) {
 	case IFLA_STATS_LINK_XSTATS:
-		retsize = bridge_get_linkxstats_size(dev);
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
 		break;
 	case IFLA_STATS_LINK_XSTATS_SLAVE:
-		retsize = brport_get_linkxstats_size(dev);
+		p = br_port_get_rtnl(dev);
+		if (!p)
+			return 0;
+		br = p->br;
+		vg = nbp_vlan_group(p);
 		break;
+	default:
+		return -EINVAL;
 	}
 
-	return retsize;
-}
-
-static int bridge_fill_linkxstats(struct sk_buff *skb,
-				  const struct net_device *dev,
-				  int *prividx)
-{
-	struct net_bridge *br = netdev_priv(dev);
-	struct nlattr *nla __maybe_unused;
-	struct net_bridge_vlan_group *vg;
-	struct net_bridge_vlan *v;
-	struct nlattr *nest;
-	int vl_idx = 0;
-
 	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
 	if (!nest)
 		return -EMSGSIZE;
 
-	vg = br_vlan_group(br);
 	if (vg) {
+		u16 pvid;
+
+		pvid = br_get_pvid(vg);
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			struct bridge_vlan_xstats vxi;
 			struct br_vlan_stats stats;
@@ -1311,6 +1324,9 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
 				continue;
 			memset(&vxi, 0, sizeof(vxi));
 			vxi.vid = v->vid;
+			vxi.flags = v->flags;
+			if (v->vid == pvid)
+				vxi.flags |= BRIDGE_VLAN_INFO_PVID;
 			br_vlan_get_stats(v, &stats);
 			vxi.rx_bytes = stats.rx_bytes;
 			vxi.rx_packets = stats.rx_packets;
@@ -1329,7 +1345,7 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
 					BRIDGE_XSTATS_PAD);
 		if (!nla)
 			goto nla_put_failure;
-		br_multicast_get_stats(br, NULL, nla_data(nla));
+		br_multicast_get_stats(br, p, nla_data(nla));
 	}
 #endif
 	nla_nest_end(skb, nest);
@@ -1344,52 +1360,6 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int brport_fill_linkxstats(struct sk_buff *skb,
-				  const struct net_device *dev,
-				  int *prividx)
-{
-	struct net_bridge_port *p = br_port_get_rtnl(dev);
-	struct nlattr *nla __maybe_unused;
-	struct nlattr *nest;
-
-	if (!p)
-		return 0;
-
-	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
-	if (!nest)
-		return -EMSGSIZE;
-#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
-				sizeof(struct br_mcast_stats),
-				BRIDGE_XSTATS_PAD);
-	if (!nla) {
-		nla_nest_end(skb, nest);
-		return -EMSGSIZE;
-	}
-	br_multicast_get_stats(p->br, p, nla_data(nla));
-#endif
-	nla_nest_end(skb, nest);
-
-	return 0;
-}
-
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
-			      int *prividx, int attr)
-{
-	int ret = -EINVAL;
-
-	switch (attr) {
-	case IFLA_STATS_LINK_XSTATS:
-		ret = bridge_fill_linkxstats(skb, dev, prividx);
-		break;
-	case IFLA_STATS_LINK_XSTATS_SLAVE:
-		ret = brport_fill_linkxstats(skb, dev, prividx);
-		break;
-	}
-
-	return ret;
-}
-
 static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
 	.get_link_af_size	= br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aac2a6e6b008..2379b2b865c9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -251,6 +251,9 @@ struct net_bridge_port
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+	int				offload_fwd_mark;
+#endif
 };
 
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -359,6 +362,11 @@ struct net_bridge
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
 	u32				auto_cnt;
+
+#ifdef CONFIG_NET_SWITCHDEV
+	int offload_fwd_mark;
+#endif
+
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 	u8				vlan_enabled;
@@ -381,6 +389,10 @@ struct br_input_skb_cb {
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	bool vlan_filtered;
 #endif
+
+#ifdef CONFIG_NET_SWITCHDEV
+	int offload_fwd_mark;
+#endif
 };
 
 #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
@@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
 static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 #endif /* CONFIG_SYSFS */
 
+/* br_switchdev.c */
+#ifdef CONFIG_NET_SWITCHDEV
+int nbp_switchdev_mark_set(struct net_bridge_port *p);
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+			      struct sk_buff *skb);
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+				  const struct sk_buff *skb);
+#else
+static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+	return 0;
+}
+
+static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+					    struct sk_buff *skb)
+{
+}
+
+static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+						const struct sk_buff *skb)
+{
+	return true;
+}
+#endif /* CONFIG_NET_SWITCHDEV */
+
 #endif
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
new file mode 100644
index 000000000000..f4097b900de1
--- /dev/null
+++ b/net/bridge/br_switchdev.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
+{
+	struct net_bridge_port *p;
+
+	/* dev is yet to be added to the port list. */
+	list_for_each_entry(p, &br->port_list, list) {
+		if (switchdev_port_same_parent_id(dev, p->dev))
+			return p->offload_fwd_mark;
+	}
+
+	return ++br->offload_fwd_mark;
+}
+
+int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+	};
+	int err;
+
+	ASSERT_RTNL();
+
+	err = switchdev_port_attr_get(p->dev, &attr);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+
+	return 0;
+}
+
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+			      struct sk_buff *skb)
+{
+	if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
+		BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+}
+
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+				  const struct sk_buff *skb)
+{
+	return !skb->offload_fwd_mark ||
+	       BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index dd6ce598de89..1d5c6dda1988 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	else
 		skb_dst_force(skb);
 
-#ifdef CONFIG_NET_SWITCHDEV
-	/* Don't forward if offload device already forwarded */
-	if (skb->offload_fwd_mark &&
-	    skb->offload_fwd_mark == dev->offload_fwd_mark) {
-		consume_skb(skb);
-		rc = NET_XMIT_SUCCESS;
-		goto out;
-	}
-#endif
-
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
@@ -4292,15 +4282,25 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+struct flush_work {
+	struct net_device *dev;
+	struct work_struct work;
+};
+
+DEFINE_PER_CPU(struct flush_work, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
 {
-	struct net_device *dev = arg;
-	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+	struct flush_work *flush = container_of(work, typeof(*flush), work);
+	struct net_device *dev = flush->dev;
 	struct sk_buff *skb, *tmp;
+	struct softnet_data *sd;
 
+	local_bh_disable();
+	sd = this_cpu_ptr(&softnet_data);
+
+	local_irq_disable();
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
@@ -4310,6 +4310,7 @@ static void flush_backlog(void *arg)
 		}
 	}
 	rps_unlock(sd);
+	local_irq_enable();
 
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 		if (skb->dev == dev) {
@@ -4318,6 +4319,27 @@ static void flush_backlog(void *arg)
 			input_queue_head_incr(sd);
 		}
 	}
+	local_bh_enable();
+}
+
+static void flush_all_backlogs(struct net_device *dev)
+{
+	unsigned int cpu;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		struct flush_work *flush = per_cpu_ptr(&flush_works, cpu);
+
+		INIT_WORK(&flush->work, flush_backlog);
+		flush->dev = dev;
+		queue_work_on(cpu, system_highpri_wq, &flush->work);
+	}
+
+	for_each_online_cpu(cpu)
+		flush_work(&per_cpu_ptr(&flush_works, cpu)->work);
+
+	put_online_cpus();
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -4805,8 +4827,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
 
 static int process_backlog(struct napi_struct *napi, int quota)
 {
-	int work = 0;
 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+	bool again = true;
+	int work = 0;
 
 	/* Check if we have pending ipi, its better to send them now,
 	 * not waiting net_rx_action() end.
@@ -4817,23 +4840,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	}
 
 	napi->weight = weight_p;
-	local_irq_disable();
-	while (1) {
+	while (again) {
 		struct sk_buff *skb;
 
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			rcu_read_lock();
-			local_irq_enable();
 			__netif_receive_skb(skb);
 			rcu_read_unlock();
-			local_irq_disable();
 			input_queue_head_incr(sd);
-			if (++work >= quota) {
-				local_irq_enable();
+			if (++work >= quota)
 				return work;
-			}
+
 		}
 
+		local_irq_disable();
 		rps_lock(sd);
 		if (skb_queue_empty(&sd->input_pkt_queue)) {
 			/*
@@ -4845,16 +4865,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			napi->state = 0;
-			rps_unlock(sd);
-
-			break;
+			again = false;
+		} else {
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
 		}
-
-		skb_queue_splice_tail_init(&sd->input_pkt_queue,
-					   &sd->process_queue);
 		rps_unlock(sd);
+		local_irq_enable();
 	}
-	local_irq_enable();
 
 	return work;
 }
@@ -6707,7 +6725,7 @@ static void rollback_registered_many(struct list_head *head)
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
-		on_each_cpu(flush_backlog, dev, 1);
+		flush_all_backlogs(dev);
 	}
 
 	synchronize_net();
@@ -7625,6 +7643,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	INIT_LIST_HEAD(&dev->ptype_all);
 	INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+	hash_init(dev->qdisc_hash);
+#endif
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index cb06aceb512a..a83766be1ad2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1350,14 +1350,18 @@ struct bpf_scratchpad {
 
 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
 
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+					  unsigned int write_len)
+{
+	return skb_ensure_writable(skb, write_len);
+}
+
 static inline int bpf_try_make_writable(struct sk_buff *skb,
 					unsigned int write_len)
 {
-	int err;
+	int err = __bpf_try_make_writable(skb, write_len);
 
-	err = skb_ensure_writable(skb, write_len);
 	bpf_compute_data_end(skb);
-
 	return err;
 }
 
@@ -1976,8 +1980,8 @@ static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	u32 pkt_type = r2;
 
 	/* We only allow a restricted subset to be changed for now. */
-	if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
-		     pkt_type > PACKET_OTHERHOST))
+	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+		     !skb_pkt_type_ok(pkt_type)))
 		return -EINVAL;
 
 	skb->pkt_type = pkt_type;
@@ -1992,6 +1996,92 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+	u32 min_len = skb_network_offset(skb);
+
+	if (skb_transport_header_was_set(skb))
+		min_len = skb_transport_offset(skb);
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		min_len = skb_checksum_start_offset(skb) +
+			  skb->csum_offset + sizeof(__sum16);
+	return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+	return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+	       65536;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	unsigned int old_len = skb->len;
+	int ret;
+
+	ret = __skb_grow_rcsum(skb, new_len);
+	if (!ret)
+		memset(skb->data + old_len, 0, new_len - old_len);
+	return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	return __skb_trim_rcsum(skb, new_len);
+}
+
+static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *)(long) r1;
+	u32 max_len = __bpf_skb_max_len(skb);
+	u32 min_len = __bpf_skb_min_len(skb);
+	u32 new_len = (u32) r2;
+	int ret;
+
+	if (unlikely(flags || new_len > max_len || new_len < min_len))
+		return -EINVAL;
+	if (skb->encapsulation)
+		return -ENOTSUPP;
+
+	/* The basic idea of this helper is that it's performing the
+	 * needed work to either grow or trim an skb, and eBPF program
+	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
+	 * bpf_lX_csum_replace() and others rather than passing a raw
+	 * buffer here. This one is a slow path helper and intended
+	 * for replies with control messages.
+	 *
+	 * Like in bpf_skb_change_proto(), we want to keep this rather
+	 * minimal and without protocol specifics so that we are able
+	 * to separate concerns as in bpf_skb_store_bytes() should only
+	 * be the one responsible for writing buffers.
+	 *
+	 * It's really expected to be a slow path operation here for
+	 * control message replies, so we're implicitly linearizing,
+	 * uncloning and drop offloads from the skb by this.
+	 */
+	ret = __bpf_try_make_writable(skb, skb->len);
+	if (!ret) {
+		if (new_len > skb->len)
+			ret = bpf_skb_grow_rcsum(skb, new_len);
+		else if (new_len < skb->len)
+			ret = bpf_skb_trim_rcsum(skb, new_len);
+		if (!ret && skb_is_gso(skb))
+			skb_gso_reset(skb);
+	}
+
+	bpf_compute_data_end(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+	.func		= bpf_skb_change_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
 	if (func == bpf_skb_vlan_push)
@@ -2002,6 +2092,8 @@ bool bpf_helper_changes_skb_data(void *func)
 		return true;
 	if (func == bpf_skb_change_proto)
 		return true;
+	if (func == bpf_skb_change_tail)
+		return true;
 	if (func == bpf_l3_csum_replace)
 		return true;
 	if (func == bpf_l4_csum_replace)
@@ -2282,7 +2374,6 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 	}
 }
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
 static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long)r1;
@@ -2303,7 +2394,7 @@ static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	if (unlikely(!cgrp))
 		return -EAGAIN;
 
-	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+	return sk_under_cgroup_hierarchy(sk, cgrp);
 }
 
 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
@@ -2314,7 +2405,41 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 };
-#endif
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+				  unsigned long off, unsigned long len)
+{
+	memcpy(dst_buff, src_buff + off, len);
+	return 0;
+}
+
+static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
+				u64 meta_size)
+{
+	struct xdp_buff *xdp = (struct xdp_buff *)(long) r1;
+	struct bpf_map *map = (struct bpf_map *)(long) r2;
+	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+	void *meta = (void *)(long) r4;
+
+	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+		return -EINVAL;
+	if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+		return -EFAULT;
+
+	return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+				bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+	.func		= bpf_xdp_event_output,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_STACK,
+	.arg5_type	= ARG_CONST_STACK_SIZE,
+};
 
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2368,6 +2493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
 		return &bpf_skb_change_type_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
@@ -2386,10 +2513,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
-#ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
-#endif
 	default:
 		return sk_filter_func_proto(func_id);
 	}
@@ -2398,7 +2523,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 static const struct bpf_func_proto *
 xdp_func_proto(enum bpf_func_id func_id)
 {
-	return sk_filter_func_proto(func_id);
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_xdp_event_output_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
 }
 
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f61c5e..a2879c0f6c4c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -116,13 +118,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_addrs *key_addrs;
 	struct flow_dissector_key_ports *key_ports;
 	struct flow_dissector_key_tags *key_tags;
+	struct flow_dissector_key_vlan *key_vlan;
 	struct flow_dissector_key_keyid *key_keyid;
+	bool skip_vlan = false;
 	u8 ip_proto = 0;
 	bool ret = false;
 
 	if (!data) {
 		data = skb->data;
-		proto = skb->protocol;
+		proto = skb_vlan_tag_present(skb) ?
+			 skb->vlan_proto : skb->protocol;
 		nhoff = skb_network_offset(skb);
 		hlen = skb_headlen(skb);
 	}
@@ -241,23 +246,45 @@ ipv6:
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
 		const struct vlan_hdr *vlan;
-		struct vlan_hdr _vlan;
 
-		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
-		if (!vlan)
-			goto out_bad;
+		if (skb_vlan_tag_present(skb))
+			proto = skb->protocol;
+
+		if (!skb_vlan_tag_present(skb) ||
+		    proto == cpu_to_be16(ETH_P_8021Q) ||
+		    proto == cpu_to_be16(ETH_P_8021AD)) {
+			struct vlan_hdr _vlan;
 
+			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
+						    data, hlen, &_vlan);
+			if (!vlan)
+				goto out_bad;
+			proto = vlan->h_vlan_encapsulated_proto;
+			nhoff += sizeof(*vlan);
+			if (skip_vlan)
+				goto again;
+		}
+
+		skip_vlan = true;
 		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_VLANID)) {
-			key_tags = skb_flow_dissector_target(flow_dissector,
-							     FLOW_DISSECTOR_KEY_VLANID,
+				       FLOW_DISSECTOR_KEY_VLAN)) {
+			key_vlan = skb_flow_dissector_target(flow_dissector,
+							     FLOW_DISSECTOR_KEY_VLAN,
 							     target_container);
 
-			key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+			if (skb_vlan_tag_present(skb)) {
+				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
+				key_vlan->vlan_priority =
+					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+			} else {
+				key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
+					VLAN_VID_MASK;
+				key_vlan->vlan_priority =
+					(ntohs(vlan->h_vlan_TCI) &
+					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+			}
 		}
 
-		proto = vlan->h_vlan_encapsulated_proto;
-		nhoff += sizeof(*vlan);
 		goto again;
 	}
 	case htons(ETH_P_PPP_SES): {
@@ -338,32 +365,42 @@ mpls:
 ip_proto_again:
 	switch (ip_proto) {
 	case IPPROTO_GRE: {
-		struct gre_hdr {
-			__be16 flags;
-			__be16 proto;
-		} *hdr, _hdr;
+		struct gre_base_hdr *hdr, _hdr;
+		u16 gre_ver;
+		int offset = 0;
 
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			goto out_bad;
-		/*
-		 * Only look inside GRE if version zero and no
-		 * routing
-		 */
-		if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+		/* Only look inside GRE without routing */
+		if (hdr->flags & GRE_ROUTING)
 			break;
 
-		proto = hdr->proto;
-		nhoff += 4;
+		/* Only look inside GRE for version 0 and 1 */
+		gre_ver = ntohs(hdr->flags & GRE_VERSION);
+		if (gre_ver > 1)
+			break;
+
+		proto = hdr->protocol;
+		if (gre_ver) {
+			/* Version1 must be PPTP, and check the flags */
+			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+				break;
+		}
+
+		offset += sizeof(struct gre_base_hdr);
+
 		if (hdr->flags & GRE_CSUM)
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+				  sizeof(((struct gre_full_hdr *)0)->reserved1);
+
 		if (hdr->flags & GRE_KEY) {
 			const __be32 *keyid;
 			__be32 _keyid;
 
-			keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
 						     data, hlen, &_keyid);
-
 			if (!keyid)
 				goto out_bad;
 
@@ -372,32 +409,65 @@ ip_proto_again:
 				key_keyid = skb_flow_dissector_target(flow_dissector,
 								      FLOW_DISSECTOR_KEY_GRE_KEYID,
 								      target_container);
-				key_keyid->keyid = *keyid;
+				if (gre_ver == 0)
+					key_keyid->keyid = *keyid;
+				else
+					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
 			}
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->key);
 		}
+
 		if (hdr->flags & GRE_SEQ)
-			nhoff += 4;
-		if (proto == htons(ETH_P_TEB)) {
-			const struct ethhdr *eth;
-			struct ethhdr _eth;
-
-			eth = __skb_header_pointer(skb, nhoff,
-						   sizeof(_eth),
-						   data, hlen, &_eth);
-			if (!eth)
+			offset += sizeof(((struct pptp_gre_header *)0)->seq);
+
+		if (gre_ver == 0) {
+			if (proto == htons(ETH_P_TEB)) {
+				const struct ethhdr *eth;
+				struct ethhdr _eth;
+
+				eth = __skb_header_pointer(skb, nhoff + offset,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
+				if (!eth)
+					goto out_bad;
+				proto = eth->h_proto;
+				offset += sizeof(*eth);
+
+				/* Cap headers that we access via pointers at the
+				 * end of the Ethernet header as our maximum alignment
+				 * at that point is only 2 bytes.
+				 */
+				if (NET_IP_ALIGN)
+					hlen = (nhoff + offset);
+			}
+		} else { /* version 1, must be PPTP */
+			u8 _ppp_hdr[PPP_HDRLEN];
+			u8 *ppp_hdr;
+
+			if (hdr->flags & GRE_ACK)
+				offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+						     sizeof(_ppp_hdr), _ppp_hdr);
+			if (!ppp_hdr)
 				goto out_bad;
-			proto = eth->h_proto;
-			nhoff += sizeof(*eth);
-
-			/* Cap headers that we access via pointers at the
-			 * end of the Ethernet header as our maximum alignment
-			 * at that point is only 2 bytes.
-			 */
-			if (NET_IP_ALIGN)
-				hlen = nhoff;
+
+			switch (PPP_PROTOCOL(ppp_hdr)) {
+			case PPP_IP:
+				proto = htons(ETH_P_IP);
+				break;
+			case PPP_IPV6:
+				proto = htons(ETH_P_IPV6);
+				break;
+			default:
+				/* Could probably catch some more like MPLS */
+				break;
+			}
+
+			offset += PPP_HDRLEN;
 		}
 
+		nhoff += offset;
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 			goto out_good;
@@ -872,8 +942,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 		.offset = offsetof(struct flow_keys, ports),
 	},
 	{
-		.key_id = FLOW_DISSECTOR_KEY_VLANID,
-		.offset = offsetof(struct flow_keys, tags),
+		.key_id = FLOW_DISSECTOR_KEY_VLAN,
+		.offset = offsetof(struct flow_keys, vlan),
 	},
 	{
 		.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cf26e04c4046..2ae929f9bd06 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			} else
 				goto out;
 		} else {
-			if (lladdr == neigh->ha && new == NUD_STALE)
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    !(flags & NEIGH_UPDATE_F_ADMIN))
 				new = old;
 		}
 	}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..1fe58167d39a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -37,6 +37,8 @@ struct net init_net = {
 };
 EXPORT_SYMBOL(init_net);
 
+static bool init_net_initialized;
+
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -750,6 +752,8 @@ static int __init net_ns_init(void)
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
+	init_net_initialized = true;
+
 	rtnl_lock();
 	list_add_tail_rcu(&init_net.list, &net_namespace_list);
 	rtnl_unlock();
@@ -811,15 +815,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
 static int __register_pernet_operations(struct list_head *list,
 					struct pernet_operations *ops)
 {
+	if (!init_net_initialized) {
+		list_add_tail(&ops->list, list);
+		return 0;
+	}
+
 	return ops_init(ops, &init_net);
 }
 
 static void __unregister_pernet_operations(struct pernet_operations *ops)
 {
-	LIST_HEAD(net_exit_list);
-	list_add(&init_net.exit_list, &net_exit_list);
-	ops_exit_list(ops, &net_exit_list);
-	ops_free_list(ops, &net_exit_list);
+	if (!init_net_initialized) {
+		list_del(&ops->list);
+	} else {
+		LIST_HEAD(net_exit_list);
+		list_add(&init_net.exit_list, &net_exit_list);
+		ops_exit_list(ops, &net_exit_list);
+		ops_free_list(ops, &net_exit_list);
+	}
 }
 
 #endif /* CONFIG_NET_NS */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 189cc78c77eb..318fc5231b2b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -704,6 +704,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 			} else if (i == RTAX_FEATURES - 1) {
 				u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
 
+				if (!user_features)
+					continue;
 				BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
 				if (nla_put_u32(skb, i + 1, user_features))
 					goto nla_put_failure;
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b60223..51a730485649 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
 #endif
 }
 
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
-	unsigned long nulls1, nulls2;
-
-	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
-	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
-	if (nulls1 > nulls2)
-		swap(nulls1, nulls2);
-
-	if (nulls1 != 0)
-		memset((char *)sk, 0, nulls1);
-	memset((char *)sk + nulls1 + sizeof(void *), 0,
-	       nulls2 - nulls1 - sizeof(void *));
-	memset((char *)sk + nulls2 + sizeof(void *), 0,
-	       size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		int family)
 {
@@ -1344,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
 		if (!sk)
 			return sk;
-		if (priority & __GFP_ZERO) {
-			if (prot->clear_sk)
-				prot->clear_sk(sk, prot->obj_size);
-			else
-				sk_prot_clear_nulls(sk, prot->obj_size);
-		}
+		if (priority & __GFP_ZERO)
+			sk_prot_clear_nulls(sk, prot->obj_size);
 	} else
 		sk = kmalloc(prot->obj_size, priority);
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 7e68bc6bc853..d8d267e9a872 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -61,27 +61,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 static DEFINE_MUTEX(dsa_switch_drivers_mutex);
 static LIST_HEAD(dsa_switch_drivers);
 
-void register_switch_driver(struct dsa_switch_driver *drv)
+void register_switch_driver(struct dsa_switch_ops *ops)
 {
 	mutex_lock(&dsa_switch_drivers_mutex);
-	list_add_tail(&drv->list, &dsa_switch_drivers);
+	list_add_tail(&ops->list, &dsa_switch_drivers);
 	mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(register_switch_driver);
 
-void unregister_switch_driver(struct dsa_switch_driver *drv)
+void unregister_switch_driver(struct dsa_switch_ops *ops)
 {
 	mutex_lock(&dsa_switch_drivers_mutex);
-	list_del_init(&drv->list);
+	list_del_init(&ops->list);
 	mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_switch_driver);
 
-static struct dsa_switch_driver *
+static struct dsa_switch_ops *
 dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 		 const char **_name, void **priv)
 {
-	struct dsa_switch_driver *ret;
+	struct dsa_switch_ops *ret;
 	struct list_head *list;
 	const char *name;
 
@@ -90,13 +90,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 
 	mutex_lock(&dsa_switch_drivers_mutex);
 	list_for_each(list, &dsa_switch_drivers) {
-		struct dsa_switch_driver *drv;
+		struct dsa_switch_ops *ops;
 
-		drv = list_entry(list, struct dsa_switch_driver, list);
+		ops = list_entry(list, struct dsa_switch_ops, list);
 
-		name = drv->probe(parent, host_dev, sw_addr, priv);
+		name = ops->probe(parent, host_dev, sw_addr, priv);
 		if (name != NULL) {
-			ret = drv;
+			ret = ops;
 			break;
 		}
 	}
@@ -117,7 +117,7 @@ static ssize_t temp1_input_show(struct device *dev,
 	struct dsa_switch *ds = dev_get_drvdata(dev);
 	int temp, ret;
 
-	ret = ds->drv->get_temp(ds, &temp);
+	ret = ds->ops->get_temp(ds, &temp);
 	if (ret < 0)
 		return ret;
 
@@ -131,7 +131,7 @@ static ssize_t temp1_max_show(struct device *dev,
 	struct dsa_switch *ds = dev_get_drvdata(dev);
 	int temp, ret;
 
-	ret = ds->drv->get_temp_limit(ds, &temp);
+	ret = ds->ops->get_temp_limit(ds, &temp);
 	if (ret < 0)
 		return ret;
 
@@ -149,7 +149,7 @@ static ssize_t temp1_max_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
-	ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+	ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
 	if (ret < 0)
 		return ret;
 
@@ -164,7 +164,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev,
 	bool alarm;
 	int ret;
 
-	ret = ds->drv->get_temp_alarm(ds, &alarm);
+	ret = ds->ops->get_temp_alarm(ds, &alarm);
 	if (ret < 0)
 		return ret;
 
@@ -184,15 +184,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct dsa_switch *ds = dev_get_drvdata(dev);
-	struct dsa_switch_driver *drv = ds->drv;
+	struct dsa_switch_ops *ops = ds->ops;
 	umode_t mode = attr->mode;
 
 	if (index == 1) {
-		if (!drv->get_temp_limit)
+		if (!ops->get_temp_limit)
 			mode = 0;
-		else if (!drv->set_temp_limit)
+		else if (!ops->set_temp_limit)
 			mode &= ~S_IWUSR;
-	} else if (index == 2 && !drv->get_temp_alarm) {
+	} else if (index == 2 && !ops->get_temp_alarm) {
 		mode = 0;
 	}
 	return mode;
@@ -228,8 +228,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
 
 		genphy_config_init(phydev);
 		genphy_read_status(phydev);
-		if (ds->drv->adjust_link)
-			ds->drv->adjust_link(ds, port, phydev);
+		if (ds->ops->adjust_link)
+			ds->ops->adjust_link(ds, port, phydev);
 	}
 
 	return 0;
@@ -303,7 +303,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
 
 static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 {
-	struct dsa_switch_driver *drv = ds->drv;
+	struct dsa_switch_ops *ops = ds->ops;
 	struct dsa_switch_tree *dst = ds->dst;
 	struct dsa_chip_data *cd = ds->cd;
 	bool valid_name_found = false;
@@ -354,7 +354,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	 * switch.
 	 */
 	if (dst->cpu_switch == index) {
-		dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+		enum dsa_tag_protocol tag_protocol;
+
+		tag_protocol = ops->get_tag_protocol(ds);
+		dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
 		if (IS_ERR(dst->tag_ops)) {
 			ret = PTR_ERR(dst->tag_ops);
 			goto out;
@@ -368,15 +371,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	/*
 	 * Do basic register setup.
 	 */
-	ret = drv->setup(ds);
+	ret = ops->setup(ds);
 	if (ret < 0)
 		goto out;
 
-	ret = drv->set_addr(ds, dst->master_netdev->dev_addr);
+	ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
 	if (ret < 0)
 		goto out;
 
-	if (!ds->slave_mii_bus && drv->phy_read) {
+	if (!ds->slave_mii_bus && ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(parent);
 		if (!ds->slave_mii_bus) {
 			ret = -ENOMEM;
@@ -423,7 +426,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	 * register with hardware monitoring subsystem.
 	 * Treat registration error as non-fatal and ignore it.
 	 */
-	if (drv->get_temp) {
+	if (ops->get_temp) {
 		const char *netname = netdev_name(dst->master_netdev);
 		char hname[IFNAMSIZ + 1];
 		int i, j;
@@ -454,7 +457,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 		 struct device *parent, struct device *host_dev)
 {
 	struct dsa_chip_data *cd = dst->pd->chip + index;
-	struct dsa_switch_driver *drv;
+	struct dsa_switch_ops *ops;
 	struct dsa_switch *ds;
 	int ret;
 	const char *name;
@@ -463,8 +466,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	/*
 	 * Probe for switch model.
 	 */
-	drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
-	if (drv == NULL) {
+	ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+	if (!ops) {
 		netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
 			   index);
 		return ERR_PTR(-EINVAL);
@@ -483,7 +486,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	ds->dst = dst;
 	ds->index = index;
 	ds->cd = cd;
-	ds->drv = drv;
+	ds->ops = ops;
 	ds->priv = priv;
 	ds->dev = parent;
 
@@ -538,12 +541,12 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 		ds->dsa_port_mask |= ~(1 << port);
 	}
 
-	if (ds->slave_mii_bus && ds->drv->phy_read)
+	if (ds->slave_mii_bus && ds->ops->phy_read)
 		mdiobus_unregister(ds->slave_mii_bus);
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int dsa_switch_suspend(struct dsa_switch *ds)
+int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
 
@@ -557,18 +560,19 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
 			return ret;
 	}
 
-	if (ds->drv->suspend)
-		ret = ds->drv->suspend(ds);
+	if (ds->ops->suspend)
+		ret = ds->ops->suspend(ds);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
 
-static int dsa_switch_resume(struct dsa_switch *ds)
+int dsa_switch_resume(struct dsa_switch *ds)
 {
 	int i, ret = 0;
 
-	if (ds->drv->resume)
-		ret = ds->drv->resume(ds);
+	if (ds->ops->resume)
+		ret = ds->ops->resume(ds);
 
 	if (ret)
 		return ret;
@@ -585,6 +589,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
 #endif
 
 /* platform driver init and cleanup *****************************************/
@@ -1086,7 +1091,6 @@ static int dsa_resume(struct device *d)
 static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
 
 static const struct of_device_id dsa_of_match_table[] = {
-	{ .compatible = "brcm,bcm7445-switch-v4.0" },
 	{ .compatible = "marvell,dsa", },
 	{}
 };
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f30bad9678f0..8278385dcd21 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 	int err;
 
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
-	 * driver and before drv->setup() has run, since the switch drivers and
+	 * driver and before ops->setup() has run, since the switch drivers and
 	 * the slave MDIO bus driver rely on these values for probing PHY
 	 * devices or not
 	 */
 	ds->phys_mii_mask = ds->enabled_port_mask;
 
-	err = ds->drv->setup(ds);
+	err = ds->ops->setup(ds);
 	if (err < 0)
 		return err;
 
-	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+	err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
 	if (err < 0)
 		return err;
 
-	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+	err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
 	if (err < 0)
 		return err;
 
-	if (!ds->slave_mii_bus && ds->drv->phy_read) {
+	if (!ds->slave_mii_bus && ds->ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
 		if (!ds->slave_mii_bus)
 			return -ENOMEM;
@@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 		dsa_user_port_unapply(port, index, ds);
 	}
 
-	if (ds->slave_mii_bus && ds->drv->phy_read)
+	if (ds->slave_mii_bus && ds->ops->phy_read)
 		mdiobus_unregister(ds->slave_mii_bus);
 }
 
@@ -443,6 +443,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
 			 struct dsa_switch_tree *dst,
 			 struct dsa_switch *ds)
 {
+	enum dsa_tag_protocol tag_protocol;
 	struct net_device *ethernet_dev;
 	struct device_node *ethernet;
 
@@ -465,7 +466,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
 		dst->cpu_port = index;
 	}
 
-	dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+	tag_protocol = ds->ops->get_tag_protocol(ds);
+	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
 	if (IS_ERR(dst->tag_ops)) {
 		dev_warn(ds->dev, "No tagger for this switch\n");
 		return PTR_ERR(dst->tag_ops);
@@ -541,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
 
 		ds->ports[reg].dn = port;
 
-		/* Initialize enabled_port_mask now for drv->setup()
+		/* Initialize enabled_port_mask now for ops->setup()
 		 * to have access to a correct value, just like what
 		 * net/dsa/dsa.c::dsa_switch_setup_one does.
 		 */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index fc9196745225..9f6c2a20f6ff 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
 	struct dsa_switch *ds = bus->priv;
 
 	if (ds->phys_mii_mask & (1 << addr))
-		return ds->drv->phy_read(ds, addr, reg);
+		return ds->ops->phy_read(ds, addr, reg);
 
 	return 0xffff;
 }
@@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
 	struct dsa_switch *ds = bus->priv;
 
 	if (ds->phys_mii_mask & (1 << addr))
-		return ds->drv->phy_write(ds, addr, reg, val);
+		return ds->ops->phy_write(ds, addr, reg, val);
 
 	return 0;
 }
@@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev)
 			goto clear_allmulti;
 	}
 
-	if (ds->drv->port_enable) {
-		err = ds->drv->port_enable(ds, p->port, p->phy);
+	if (ds->ops->port_enable) {
+		err = ds->ops->port_enable(ds, p->port, p->phy);
 		if (err)
 			goto clear_promisc;
 	}
 
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, stp_state);
+	if (ds->ops->port_stp_state_set)
+		ds->ops->port_stp_state_set(ds, p->port, stp_state);
 
 	if (p->phy)
 		phy_start(p->phy);
@@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev)
 	if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
 		dev_uc_del(master, dev->dev_addr);
 
-	if (ds->drv->port_disable)
-		ds->drv->port_disable(ds, p->port, p->phy);
+	if (ds->ops->port_disable)
+		ds->ops->port_disable(ds, p->port, p->phy);
 
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
+	if (ds->ops->port_stp_state_set)
+		ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
 
 	return 0;
 }
@@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
+		if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
 			return -EOPNOTSUPP;
 
-		return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+		return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans);
 	}
 
-	ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+	ds->ops->port_vlan_add(ds, p->port, vlan, trans);
 
 	return 0;
 }
@@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev,
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (!ds->drv->port_vlan_del)
+	if (!ds->ops->port_vlan_del)
 		return -EOPNOTSUPP;
 
-	return ds->drv->port_vlan_del(ds, p->port, vlan);
+	return ds->ops->port_vlan_del(ds, p->port, vlan);
 }
 
 static int dsa_slave_port_vlan_dump(struct net_device *dev,
@@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->port_vlan_dump)
-		return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
+	if (ds->ops->port_vlan_dump)
+		return ds->ops->port_vlan_dump(ds, p->port, vlan, cb);
 
 	return -EOPNOTSUPP;
 }
@@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev,
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
 			return -EOPNOTSUPP;
 
-		return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+		return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans);
 	}
 
-	ds->drv->port_fdb_add(ds, p->port, fdb, trans);
+	ds->ops->port_fdb_add(ds, p->port, fdb, trans);
 
 	return 0;
 }
@@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev,
 	struct dsa_switch *ds = p->parent;
 	int ret = -EOPNOTSUPP;
 
-	if (ds->drv->port_fdb_del)
-		ret = ds->drv->port_fdb_del(ds, p->port, fdb);
+	if (ds->ops->port_fdb_del)
+		ret = ds->ops->port_fdb_del(ds, p->port, fdb);
 
 	return ret;
 }
@@ -284,8 +284,8 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev,
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->port_fdb_dump)
-		return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
+	if (ds->ops->port_fdb_dump)
+		return ds->ops->port_fdb_dump(ds, p->port, fdb, cb);
 
 	return -EOPNOTSUPP;
 }
@@ -308,9 +308,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev,
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans))
-		return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP;
+		return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
 
-	ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state);
+	ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state);
 
 	return 0;
 }
@@ -326,8 +326,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
 	if (switchdev_trans_ph_prepare(trans))
 		return 0;
 
-	if (ds->drv->port_vlan_filtering)
-		return ds->drv->port_vlan_filtering(ds, p->port,
+	if (ds->ops->port_vlan_filtering)
+		return ds->ops->port_vlan_filtering(ds, p->port,
 						    attr->u.vlan_filtering);
 
 	return 0;
@@ -365,8 +365,8 @@ static int dsa_slave_ageing_time(struct net_device *dev,
 	ds->ports[p->port].ageing_time = ageing_time;
 	ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
 
-	if (ds->drv->set_ageing_time)
-		return ds->drv->set_ageing_time(ds, ageing_time);
+	if (ds->ops->set_ageing_time)
+		return ds->ops->set_ageing_time(ds, ageing_time);
 
 	return 0;
 }
@@ -481,8 +481,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
 
 	p->bridge_dev = br;
 
-	if (ds->drv->port_bridge_join)
-		ret = ds->drv->port_bridge_join(ds, p->port, br);
+	if (ds->ops->port_bridge_join)
+		ret = ds->ops->port_bridge_join(ds, p->port, br);
 
 	return ret == -EOPNOTSUPP ? 0 : ret;
 }
@@ -493,16 +493,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev)
 	struct dsa_switch *ds = p->parent;
 
 
-	if (ds->drv->port_bridge_leave)
-		ds->drv->port_bridge_leave(ds, p->port);
+	if (ds->ops->port_bridge_leave)
+		ds->ops->port_bridge_leave(ds, p->port);
 
 	p->bridge_dev = NULL;
 
 	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 	 */
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
+	if (ds->ops->port_stp_state_set)
+		ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -605,8 +605,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_regs_len)
-		return ds->drv->get_regs_len(ds, p->port);
+	if (ds->ops->get_regs_len)
+		return ds->ops->get_regs_len(ds, p->port);
 
 	return -EOPNOTSUPP;
 }
@@ -617,8 +617,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_regs)
-		ds->drv->get_regs(ds, p->port, regs, _p);
+	if (ds->ops->get_regs)
+		ds->ops->get_regs(ds, p->port, regs, _p);
 }
 
 static int dsa_slave_nway_reset(struct net_device *dev)
@@ -651,8 +651,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
 	if (ds->cd && ds->cd->eeprom_len)
 		return ds->cd->eeprom_len;
 
-	if (ds->drv->get_eeprom_len)
-		return ds->drv->get_eeprom_len(ds);
+	if (ds->ops->get_eeprom_len)
+		return ds->ops->get_eeprom_len(ds);
 
 	return 0;
 }
@@ -663,8 +663,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_eeprom)
-		return ds->drv->get_eeprom(ds, eeprom, data);
+	if (ds->ops->get_eeprom)
+		return ds->ops->get_eeprom(ds, eeprom, data);
 
 	return -EOPNOTSUPP;
 }
@@ -675,8 +675,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->set_eeprom)
-		return ds->drv->set_eeprom(ds, eeprom, data);
+	if (ds->ops->set_eeprom)
+		return ds->ops->set_eeprom(ds, eeprom, data);
 
 	return -EOPNOTSUPP;
 }
@@ -694,8 +694,8 @@ static void dsa_slave_get_strings(struct net_device *dev,
 		strncpy(data + len, "tx_bytes", len);
 		strncpy(data + 2 * len, "rx_packets", len);
 		strncpy(data + 3 * len, "rx_bytes", len);
-		if (ds->drv->get_strings != NULL)
-			ds->drv->get_strings(ds, p->port, data + 4 * len);
+		if (ds->ops->get_strings)
+			ds->ops->get_strings(ds, p->port, data + 4 * len);
 	}
 }
 
@@ -714,8 +714,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
 		dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
 	}
 
-	if (ds->drv->get_ethtool_stats)
-		ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
+	if (ds->ops->get_ethtool_stats)
+		ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
 }
 
 static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
@@ -727,8 +727,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
 	if (dst->master_ethtool_ops.get_sset_count)
 		count += dst->master_ethtool_ops.get_sset_count(dev, sset);
 
-	if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
-		count += ds->drv->get_sset_count(ds);
+	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+		count += ds->ops->get_sset_count(ds);
 
 	return count;
 }
@@ -755,14 +755,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
 		dst->master_ethtool_ops.get_strings(dev, stringset, data);
 	}
 
-	if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
+	if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
 		ndata = data + mcount * len;
 		/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
 		 * the output after to prepend our CPU port prefix we
 		 * constructed earlier
 		 */
-		ds->drv->get_strings(ds, cpu_port, ndata);
-		count = ds->drv->get_sset_count(ds);
+		ds->ops->get_strings(ds, cpu_port, ndata);
+		count = ds->ops->get_sset_count(ds);
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
@@ -782,8 +782,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
 	data[1] = dev->stats.tx_bytes;
 	data[2] = dev->stats.rx_packets;
 	data[3] = dev->stats.rx_bytes;
-	if (ds->drv->get_ethtool_stats != NULL)
-		ds->drv->get_ethtool_stats(ds, p->port, data + 4);
+	if (ds->ops->get_ethtool_stats)
+		ds->ops->get_ethtool_stats(ds, p->port, data + 4);
 }
 
 static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
@@ -795,8 +795,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 		int count;
 
 		count = 4;
-		if (ds->drv->get_sset_count != NULL)
-			count += ds->drv->get_sset_count(ds);
+		if (ds->ops->get_sset_count)
+			count += ds->ops->get_sset_count(ds);
 
 		return count;
 	}
@@ -809,8 +809,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_wol)
-		ds->drv->get_wol(ds, p->port, w);
+	if (ds->ops->get_wol)
+		ds->ops->get_wol(ds, p->port, w);
 }
 
 static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
@@ -819,8 +819,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 	struct dsa_switch *ds = p->parent;
 	int ret = -EOPNOTSUPP;
 
-	if (ds->drv->set_wol)
-		ret = ds->drv->set_wol(ds, p->port, w);
+	if (ds->ops->set_wol)
+		ret = ds->ops->set_wol(ds, p->port, w);
 
 	return ret;
 }
@@ -831,10 +831,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->parent;
 	int ret;
 
-	if (!ds->drv->set_eee)
+	if (!ds->ops->set_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->drv->set_eee(ds, p->port, p->phy, e);
+	ret = ds->ops->set_eee(ds, p->port, p->phy, e);
 	if (ret)
 		return ret;
 
@@ -850,10 +850,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->parent;
 	int ret;
 
-	if (!ds->drv->get_eee)
+	if (!ds->ops->get_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->drv->get_eee(ds, p->port, e);
+	ret = ds->ops->get_eee(ds, p->port, e);
 	if (ret)
 		return ret;
 
@@ -988,8 +988,8 @@ static void dsa_slave_adjust_link(struct net_device *dev)
 		p->old_pause = p->phy->pause;
 	}
 
-	if (ds->drv->adjust_link && status_changed)
-		ds->drv->adjust_link(ds, p->port, p->phy);
+	if (ds->ops->adjust_link && status_changed)
+		ds->ops->adjust_link(ds, p->port, p->phy);
 
 	if (status_changed)
 		phy_print_status(p->phy);
@@ -1004,8 +1004,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev,
 	if (dev) {
 		p = netdev_priv(dev);
 		ds = p->parent;
-		if (ds->drv->fixed_link_update)
-			ds->drv->fixed_link_update(ds, p->port, status);
+		if (ds->ops->fixed_link_update)
+			ds->ops->fixed_link_update(ds, p->port, status);
 	}
 
 	return 0;
@@ -1062,8 +1062,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		phy_dn = port_dn;
 	}
 
-	if (ds->drv->get_phy_flags)
-		phy_flags = ds->drv->get_phy_flags(ds, p->port);
+	if (ds->ops->get_phy_flags)
+		phy_flags = ds->ops->get_phy_flags(ds, p->port);
 
 	if (phy_dn) {
 		int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 55513e654d79..e94b47be0019 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog)
 	 * we can only allow the backlog to be adjusted.
 	 */
 	if (old_state != TCP_LISTEN) {
-		/* Check special setups for testing purpose to enable TFO w/o
-		 * requiring TCP_FASTOPEN sockopt.
+		/* Enable TFO w/o requiring TCP_FASTOPEN socket option.
 		 * Note that only TCP sockets (SOCK_STREAM) will reach here.
-		 * Also fastopenq may already been allocated because this
-		 * socket was in TCP_LISTEN state previously but was
-		 * shutdown() (rather than close()).
+		 * Also fastopen backlog may already been set via the option
+		 * because the socket was in TCP_LISTEN state previously but
+		 * was shutdown() rather than close().
 		 */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+		if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+		    (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
 		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
-			if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
-				fastopen_queue_tune(sk, backlog);
-			else if ((sysctl_tcp_fastopen &
-				  TFO_SERVER_WO_SOCKOPT2) != 0)
-				fastopen_queue_tune(sk,
-				    ((uint)sysctl_tcp_fastopen) >> 16);
-
+			fastopen_queue_tune(sk, backlog);
 			tcp_fastopen_init_key_once(true);
 		}
+
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
 			goto out;
@@ -921,6 +916,8 @@ const struct proto_ops inet_stream_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ef2ebeb89d0f..317c31939732 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 		return NULL;
 
 	switch (id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, tb);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, tb);
 		break;
@@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old,
 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	switch (new->tb_id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, new);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, new);
 		break;
@@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net)
 
 	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 539fa264e67d..8066ccc48a17 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1576,7 +1576,8 @@ static bool fib_good_nh(const struct fib_nh *nh)
 
 		rcu_read_lock_bh();
 
-		n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
+		n = __ipv4_neigh_lookup_noref(nh->nh_dev,
+					      (__force u32)nh->nh_gw);
 		if (n)
 			state = n->nud_state;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9b4ca87f70ba..606cc3e85d2b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 			continue;
 		}
 
+		/* Based on RFC3376 5.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->crcount)) &&
+		    (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
 		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 38c2c47fe0e8..abfbe492ebfe 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,6 +45,7 @@ struct inet_diag_entry {
 	u16 family;
 	u16 userlocks;
 	u32 ifindex;
+	u32 mark;
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -580,6 +581,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 				yes = 0;
 			break;
 		}
+		case INET_DIAG_BC_MARK_COND: {
+			struct inet_diag_markcond *cond;
+
+			cond = (struct inet_diag_markcond *)(op + 1);
+			if ((entry->mark & cond->mask) != cond->mark)
+				yes = 0;
+			break;
+		}
 		}
 
 		if (yes) {
@@ -624,6 +633,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	entry.dport = ntohs(inet->inet_dport);
 	entry.ifindex = sk->sk_bound_dev_if;
 	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+	if (sk_fullsock(sk))
+		entry.mark = sk->sk_mark;
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+	else
+		entry.mark = 0;
 
 	return inet_diag_bc_run(bc, &entry);
 }
@@ -706,10 +721,25 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op,
 	return true;
 }
 
-static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
+			   int *min_len)
+{
+	*min_len += sizeof(struct inet_diag_markcond);
+	return len >= *min_len;
+}
+
+static int inet_diag_bc_audit(const struct nlattr *attr,
+			      const struct sk_buff *skb)
 {
-	const void *bc = bytecode;
-	int  len = bytecode_len;
+	bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+	const void *bytecode, *bc;
+	int bytecode_len, len;
+
+	if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+		return -EINVAL;
+
+	bytecode = bc = nla_data(attr);
+	len = bytecode_len = nla_len(attr);
 
 	while (len > 0) {
 		int min_len = sizeof(struct inet_diag_bc_op);
@@ -732,6 +762,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 			if (!valid_port_comparison(bc, len, &min_len))
 				return -EINVAL;
 			break;
+		case INET_DIAG_BC_MARK_COND:
+			if (!net_admin)
+				return -EPERM;
+			if (!valid_markcond(bc, len, &min_len))
+				return -EINVAL;
+			break;
 		case INET_DIAG_BC_AUTO:
 		case INET_DIAG_BC_JMP:
 		case INET_DIAG_BC_NOP:
@@ -1020,13 +1056,13 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(nlh, hdrlen)) {
 			struct nlattr *attr;
+			int err;
 
 			attr = nlmsg_find_attr(nlh, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			if (!attr ||
-			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-				return -EINVAL;
+			err = inet_diag_bc_audit(attr, skb);
+			if (err)
+				return err;
 		}
 		{
 			struct netlink_dump_control c = {
@@ -1051,13 +1087,13 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
+			int err;
 
 			attr = nlmsg_find_attr(h, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			if (!attr ||
-			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-				return -EINVAL;
+			err = inet_diag_bc_audit(attr, skb);
+			if (err)
+				return err;
 		}
 		{
 			struct netlink_dump_control c = {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1d71c40eaaf3..071a785c65eb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -85,7 +85,6 @@
 /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
 #define CONF_OPEN_RETRIES 	2	/* (Re)open devices twice */
 #define CONF_SEND_RETRIES 	6	/* Send six requests per open */
-#define CONF_INTER_TIMEOUT	(HZ)	/* Inter-device timeout: 1 second */
 #define CONF_BASE_TIMEOUT	(HZ*2)	/* Initial timeout: 2 seconds */
 #define CONF_TIMEOUT_RANDOM	(HZ)	/* Maximum amount of randomization */
 #define CONF_TIMEOUT_MULT	*7/4	/* Rate of timeout growth */
@@ -188,7 +187,7 @@ struct ic_device {
 };
 
 static struct ic_device *ic_first_dev __initdata;	/* List of open device */
-static struct net_device *ic_dev __initdata;		/* Selected device */
+static struct ic_device *ic_dev __initdata;		/* Selected device */
 
 static bool __init ic_is_init_dev(struct net_device *dev)
 {
@@ -307,7 +306,7 @@ static void __init ic_close_devs(void)
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if (dev != ic_dev && !netdev_uses_dsa(dev)) {
+		if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
 			dev_change_flags(dev, d->flags);
 		}
@@ -372,7 +371,7 @@ static int __init ic_setup_if(void)
 	int err;
 
 	memset(&ir, 0, sizeof(ir));
-	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
 	set_sockaddr(sin, ic_myaddr, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
 		pr_err("IP-Config: Unable to set interface address (%d)\n",
@@ -396,7 +395,7 @@ static int __init ic_setup_if(void)
 	 * out, we'll try to muddle along.
 	 */
 	if (ic_dev_mtu != 0) {
-		strcpy(ir.ifr_name, ic_dev->name);
+		strcpy(ir.ifr_name, ic_dev->dev->name);
 		ir.ifr_mtu = ic_dev_mtu;
 		if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
 			pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
@@ -568,7 +567,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop_unlock;
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	if (ic_myaddr == NONE)
 		ic_myaddr = tip;
 	ic_servaddr = sip;
@@ -655,8 +654,6 @@ static struct packet_type bootp_packet_type __initdata = {
 	.func =	ic_bootp_recv,
 };
 
-static __be32 ic_dev_xid;		/* Device under configuration */
-
 /*
  *  Initialize DHCP/BOOTP extension fields in the request.
  */
@@ -666,14 +663,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 };
 #ifdef IPCONFIG_DHCP
 
 static void __init
-ic_dhcp_init_options(u8 *options)
+ic_dhcp_init_options(u8 *options, struct ic_device *d)
 {
 	u8 mt = ((ic_servaddr == NONE)
 		 ? DHCPDISCOVER : DHCPREQUEST);
 	u8 *e = options;
 	int len;
 
-	pr_debug("DHCP: Sending message type %d\n", mt);
+	pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name);
 
 	memcpy(e, ic_bootp_cookie, 4);	/* RFC1048 Magic Cookie */
 	e += 4;
@@ -857,7 +854,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 	/* add DHCP options or BOOTP extensions */
 #ifdef IPCONFIG_DHCP
 	if (ic_proto_enabled & IC_USE_DHCP)
-		ic_dhcp_init_options(b->exten);
+		ic_dhcp_init_options(b->exten, d);
 	else
 #endif
 		ic_bootp_init_ext(b->exten);
@@ -1033,14 +1030,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	/* Is it a reply to our BOOTP request? */
 	if (b->op != BOOTP_REPLY ||
 	    b->xid != d->xid) {
-		net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
-				    b->op, b->xid);
-		goto drop_unlock;
-	}
-
-	/* Is it a reply for the device we are configuring? */
-	if (b->xid != ic_dev_xid) {
-		net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
+		net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n",
+				    d->dev->name, b->op, b->xid);
 		goto drop_unlock;
 	}
 
@@ -1075,7 +1066,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 				}
 			}
 
-			pr_debug("DHCP: Got message type %d\n", mt);
+			pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name);
 
 			switch (mt) {
 			case DHCPOFFER:
@@ -1130,7 +1121,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	}
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	ic_myaddr = b->your_ip;
 	ic_servaddr = b->server_ip;
 	ic_addrservaddr = b->iph.saddr;
@@ -1225,9 +1216,6 @@ static int __init ic_dynamic(void)
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
 	for (;;) {
 #ifdef IPCONFIG_BOOTP
-		/* Track the device we are configuring */
-		ic_dev_xid = d->xid;
-
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
 #endif
@@ -1236,15 +1224,19 @@ static int __init ic_dynamic(void)
 			ic_rarp_send_if(d);
 #endif
 
-		jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
-		while (time_before(jiffies, jiff) && !ic_got_reply)
-			schedule_timeout_uninterruptible(1);
+		if (!d->next) {
+			jiff = jiffies + timeout;
+			while (time_before(jiffies, jiff) && !ic_got_reply)
+				schedule_timeout_uninterruptible(1);
+		}
 #ifdef IPCONFIG_DHCP
 		/* DHCP isn't done until we get a DHCPACK. */
 		if ((ic_got_reply & IC_BOOTP) &&
 		    (ic_proto_enabled & IC_USE_DHCP) &&
 		    ic_dhcp_msgtype != DHCPACK) {
 			ic_got_reply = 0;
+			/* continue on device that got the reply */
+			d = ic_dev;
 			pr_cont(",");
 			continue;
 		}
@@ -1487,7 +1479,7 @@ static int __init ip_auto_config(void)
 #endif /* IPCONFIG_DYNAMIC */
 	} else {
 		/* Device selected manually or only one device -> use it */
-		ic_dev = ic_first_dev->dev;
+		ic_dev = ic_first_dev;
 	}
 
 	addr = root_nfs_parse_addr(root_server_path);
@@ -1501,14 +1493,6 @@ static int __init ip_auto_config(void)
 		return -1;
 
 	/*
-	 * Close all network devices except the device we've
-	 * autoconfigured and set up routes.
-	 */
-	ic_close_devs();
-	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
-		return -1;
-
-	/*
 	 * Record which protocol was actually used.
 	 */
 #ifdef IPCONFIG_DYNAMIC
@@ -1522,7 +1506,7 @@ static int __init ip_auto_config(void)
 	pr_info("IP-Config: Complete:\n");
 
 	pr_info("     device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
-		ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+		ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr,
 		&ic_myaddr, &ic_netmask, &ic_gateway);
 	pr_info("     host=%s, domain=%s, nis-domain=%s\n",
 		utsname()->nodename, ic_domain, utsname()->domainname);
@@ -1542,7 +1526,18 @@ static int __init ip_auto_config(void)
 	pr_cont("\n");
 #endif /* !SILENT */
 
-	return 0;
+	/*
+	 * Close all network devices except the device we've
+	 * autoconfigured and set up routes.
+	 */
+	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+		err = -1;
+	else
+		err = 0;
+
+	ic_close_devs();
+
+	return err;
 }
 
 late_initcall(ip_auto_config);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f665b63a927..1ed015e4bc79 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
 	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
 	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+	SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE),
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ffbb218de520..77311a92275c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1570,6 +1570,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_peek_len(struct socket *sock)
+{
+	return tcp_inq(sock->sk);
+}
+EXPORT_SYMBOL(tcp_peek_len);
+
 /*
  *	This routine copies from a sock struct into the user buffer.
  *
@@ -3092,23 +3098,6 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 }
 EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
-			const struct tcphdr *th)
-{
-	struct scatterlist sg;
-	struct tcphdr hdr;
-
-	/* We are not allowed to change tcphdr, make a local copy */
-	memcpy(&hdr, th, sizeof(hdr));
-	hdr.check = 0;
-
-	/* options aren't included in the hash */
-	sg_init_one(&sg, &hdr, sizeof(hdr));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
-	return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_header);
-
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 			  const struct sk_buff *skb, unsigned int header_len)
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ebf45b38bc3..8cd02c0b056c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4392,12 +4392,9 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 		if (tcp_prune_queue(sk) < 0)
 			return -1;
 
-		if (!sk_rmem_schedule(sk, skb, size)) {
+		while (!sk_rmem_schedule(sk, skb, size)) {
 			if (!tcp_prune_ofo_queue(sk))
 				return -1;
-
-			if (!sk_rmem_schedule(sk, skb, size))
-				return -1;
 		}
 	}
 	return 0;
@@ -4874,29 +4871,41 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 }
 
 /*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ *    (But if application shrinks SO_RCVBUF, we could still end up
+ *     freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
  */
 static bool tcp_prune_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool res = false;
+	struct sk_buff *skb;
 
-	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
-		__skb_queue_purge(&tp->out_of_order_queue);
+	if (skb_queue_empty(&tp->out_of_order_queue))
+		return false;
 
-		/* Reset SACK state.  A conforming SACK implementation will
-		 * do the same at a timeout based retransmit.  When a connection
-		 * is in a sad state like this, we care only about integrity
-		 * of the connection not performance.
-		 */
-		if (tp->rx_opt.sack_ok)
-			tcp_sack_reset(&tp->rx_opt);
+	NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+
+	while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) {
+		tcp_drop(sk, skb);
 		sk_mem_reclaim(sk);
-		res = true;
+		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+		    !tcp_under_memory_pressure(sk))
+			break;
 	}
-	return res;
+
+	/* Reset SACK state.  A conforming SACK implementation will
+	 * do the same at a timeout based retransmit.  When a connection
+	 * is in a sad state like this, we care only about integrity
+	 * of the connection not performance.
+	 */
+	if (tp->rx_opt.sack_ok)
+		tcp_sack_reset(&tp->rx_opt);
+	return true;
 }
 
 /* Reduce allocated memory if we can, trying to get
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7158d4f8dae4..a75bf48d7950 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1175,6 +1175,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
 				     &iph->saddr, ntohs(th->source),
 				     &iph->daddr, ntohs(th->dest),
@@ -1537,6 +1538,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_prequeue);
 
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+{
+	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+
+	/* Only socket owner can try to collapse/prune rx queues
+	 * to reduce memory overhead, so add a little headroom here.
+	 * Few sockets backlog are possibly concurrently non empty.
+	 */
+	limit += 64*1024;
+
+	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
+	 * we can fix skb->truesize to its real value to avoid future drops.
+	 * This is valid because skb is not yet charged to the socket.
+	 * It has been noticed pure SACK packets were sometimes dropped
+	 * (if cooked by drivers without copybreak feature).
+	 */
+	if (!skb->data_len)
+		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+
+	if (unlikely(sk_add_backlog(sk, skb, limit))) {
+		bh_unlock_sock(sk);
+		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(tcp_add_backlog);
+
 /*
  *	From tcp_input.c
  */
@@ -1608,6 +1637,7 @@ process:
 
 		sk = req->rsk_listener;
 		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+			sk_drops_add(sk, skb);
 			reqsk_put(req);
 			goto discard_it;
 		}
@@ -1666,10 +1696,7 @@ process:
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v4_do_rcv(sk, skb);
-	} else if (unlikely(sk_add_backlog(sk, skb,
-					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
-		bh_unlock_sock(sk);
-		__NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdaef7fd6e47..8b45794eb6b2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2776,7 +2776,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
 	tcp_for_write_queue_from(skb, sk) {
-		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+		__u8 sacked;
 		int segs;
 
 		if (skb == tcp_send_head(sk))
@@ -2788,6 +2788,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
 			return;
+		sacked = TCP_SKB_CB(skb)->sacked;
 		/* In case tcp_shift_skb_data() have aggregated large skbs,
 		 * we need to make sure not sending too bigs TSO packets
 		 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5fdcb8d108d4..058c31286ce1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
 #include <net/busy_poll.h>
 #include "udp_impl.h"
 #include <net/sock_reuseport.h>
+#include <net/addrconf.h>
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -2192,6 +2193,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 }
 EXPORT_SYMBOL(udp_poll);
 
+int udp_abort(struct sock *sk, int err)
+{
+	lock_sock(sk);
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+	udp_disconnect(sk, 0);
+
+	release_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(udp_abort);
+
 struct proto udp_prot = {
 	.name		   = "UDP",
 	.owner		   = THIS_MODULE,
@@ -2221,7 +2236,7 @@ struct proto udp_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
-	.clear_sk	   = sk_prot_clear_portaddr_nulls,
+	.diag_destroy	   = udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 3d5ccf4b1412..8a9f6e535caa 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -165,12 +165,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 	r->idiag_wqueue = sk_wmem_alloc_get(sk);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int __udp_diag_destroy(struct sk_buff *in_skb,
+			      const struct inet_diag_req_v2 *req,
+			      struct udp_table *tbl)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk;
+	int err;
+
+	rcu_read_lock();
+
+	if (req->sdiag_family == AF_INET)
+		sk = __udp4_lib_lookup(net,
+				req->id.idiag_dst[0], req->id.idiag_dport,
+				req->id.idiag_src[0], req->id.idiag_sport,
+				req->id.idiag_if, tbl, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (req->sdiag_family == AF_INET6) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+			sk = __udp4_lib_lookup(net,
+					req->id.idiag_dst[0], req->id.idiag_dport,
+					req->id.idiag_src[0], req->id.idiag_sport,
+					req->id.idiag_if, tbl, NULL);
+
+		else
+			sk = __udp6_lib_lookup(net,
+					(struct in6_addr *)req->id.idiag_dst,
+					req->id.idiag_dport,
+					(struct in6_addr *)req->id.idiag_src,
+					req->id.idiag_sport,
+					req->id.idiag_if, tbl, NULL);
+	}
+#endif
+	else {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+		sk = NULL;
+
+	rcu_read_unlock();
+
+	if (!sk)
+		return -ENOENT;
+
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_put(sk);
+		return -ENOENT;
+	}
+
+	err = sock_diag_destroy(sk, ECONNABORTED);
+
+	sock_put(sk);
+
+	return err;
+}
+
+static int udp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	return __udp_diag_destroy(in_skb, req, &udp_table);
+}
+
+static int udplite_diag_destroy(struct sk_buff *in_skb,
+				const struct inet_diag_req_v2 *req)
+{
+	return __udp_diag_destroy(in_skb, req, &udplite_table);
+}
+
+#endif
+
 static const struct inet_diag_handler udp_diag_handler = {
 	.dump		 = udp_diag_dump,
 	.dump_one	 = udp_diag_dump_one,
 	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDP,
 	.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = udp_diag_destroy,
+#endif
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -192,6 +268,9 @@ static const struct inet_diag_handler udplite_diag_handler = {
 	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDPLITE,
 	.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = udplite_diag_destroy,
+#endif
 };
 
 static int __init udp_diag_init(void)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2eea073e27ef..af817158d830 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -60,7 +60,6 @@ struct proto 	udplite_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
-	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b454055ba625..46ad699937fd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index ec9efbcdad35..aba0998ddbfb 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -172,6 +172,5 @@ static void __exit ila_fini(void)
 
 module_init(ila_init);
 module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
 MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
 MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 704274cbd495..397e1ed3daa3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -61,12 +61,12 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT  5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
 
 static int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
-	struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
 	struct net_device *fb_tunnel_dev;
 };
@@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
    will match fallback tunnel.
  */
 
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
 static u32 HASH_ADDR(const struct in6_addr *addr)
 {
 	u32 hash = ipv6_addr_hash(addr);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
 }
 
 #define tunnels_r_l	tunnels[3]
@@ -1087,7 +1087,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
 
 	for (prio = 0; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
 			struct ip6_tnl *t;
 
 			t = rtnl_dereference(ign->tunnels[prio][h]);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7b0481e3738f..2050217df565 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -64,8 +64,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("ip6tnl");
 MODULE_ALIAS_NETDEV("ip6tnl0");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
 }
 
 static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,7 +87,7 @@ struct ip6_tnl_net {
 	/* the IPv6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
 };
@@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
 		if (dev->rtnl_link_ops == &ip6_link_ops)
 			unregister_netdevice_queue(dev, &list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			/* If dev is in the same netns, it has already
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f14040..cc7e05898307 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -50,14 +50,14 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT  5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
 
 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
 }
 
 static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@ struct vti6_net {
 	/* the vti6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
 };
@@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
 	struct ip6_tnl *t;
 	LIST_HEAD(list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			unregister_netdevice_queue(t->dev, &list);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e83664..75c1fc54f188 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 			continue;
 		}
 
+		/* Based on RFC3810 6.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->mca_crcount)) &&
+		    (type == MLD2_ALLOW_NEW_SOURCES ||
+		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 182b6a9be29d..b1cdf8009d29 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,7 @@
    For comments look at net/ipv4/ip_gre.c --ANK
  */
 
-#define HASH_SIZE  16
+#define IP6_SIT_HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
 static bool log_ecn_error = true;
@@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly;
 
 static int sit_net_id __read_mostly;
 struct sit_net {
-	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
 	struct ip_tunnel __rcu *tunnels_wc[1];
 	struct ip_tunnel __rcu **tunnels[4];
 
@@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 }
 #endif
 
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
 {
 	return ipproto == IPPROTO_IPV6 ||
 		ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
 
 	for (prio = 1; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
 			struct ip_tunnel *t;
 
 			t = rtnl_dereference(sitn->tunnels[prio][h]);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 94f4f89d73e7..04529a3d42cb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
 				     genhash ? "failed" : "mismatch",
 				     &ip6h->saddr, ntohs(th->source),
@@ -1415,6 +1416,7 @@ process:
 		sk = req->rsk_listener;
 		tcp_v6_fill_cb(skb, hdr, th);
 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
+			sk_drops_add(sk, skb);
 			reqsk_put(req);
 			goto discard_it;
 		}
@@ -1471,10 +1473,7 @@ process:
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v6_do_rcv(sk, skb);
-	} else if (unlikely(sk_add_backlog(sk, skb,
-					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
-		bh_unlock_sock(sk);
-		__NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
@@ -1868,17 +1867,6 @@ void tcp6_proc_exit(struct net *net)
 }
 #endif
 
-static void tcp_v6_clear_sk(struct sock *sk, int size)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* we do not want to clear pinet6 field, because of RCU lookups */
-	sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-	memset(&inet->pinet6 + 1, 0, size);
-}
-
 struct proto tcpv6_prot = {
 	.name			= "TCPv6",
 	.owner			= THIS_MODULE,
@@ -1920,7 +1908,6 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-	.clear_sk		= tcp_v6_clear_sk,
 	.diag_destroy		= tcp_abort,
 };
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 19ac3a1c308d..9aa7c1c7a9ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net)
 }
 #endif /* CONFIG_PROC_FS */
 
-void udp_v6_clear_sk(struct sock *sk, int size)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* we do not want to clear pinet6 field, because of RCU lookups */
-	sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-	memset(&inet->pinet6 + 1, 0, size);
-}
-
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
@@ -1465,7 +1454,7 @@ struct proto udpv6_prot = {
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-	.clear_sk	   = udp_v6_clear_sk,
+	.diag_destroy      = udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 0682c031ccdc..f6eb1ab34f4b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
-void udp_v6_clear_sk(struct sock *sk, int size);
-
 #ifdef CONFIG_PROC_FS
 int udp6_seq_show(struct seq_file *seq, void *v);
 #endif
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index fd6ef414899b..47d0d2b87106 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,7 +55,6 @@ struct proto udplitev6_prot = {
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-	.clear_sk	   = udp_v6_clear_sk,
 };
 
 static struct inet_protosw udplite6_protosw = {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 8d2f7c9b491d..db639690c205 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -845,9 +845,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (sock->state != SS_UNCONNECTED)
 		goto out;
 
-	if ((sk = sock->sk) == NULL)
-		goto out;
-
 	err = -EOPNOTSUPP;
 	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
 	    (sk->sk_type != SOCK_DGRAM))
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index 5db94d940ecc..87fca36e6c47 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig
@@ -3,6 +3,7 @@ config AF_KCM
 	tristate "KCM sockets"
 	depends on INET
 	select BPF_SYSCALL
+	select STREAM_PARSER
 	---help---
 	  KCM (Kernel Connection Multiplexor) sockets provide a method
 	  for multiplexing messages of a message based application
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 16c2e03bd388..bf75c9231cca 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->stats.rx_msgs,
-		   psock->stats.rx_bytes,
+		   psock->strp.stats.rx_msgs,
+		   psock->strp.stats.rx_bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,14 +170,27 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->rx_stopped)
+	if (psock->strp.rx_stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (psock->ready_rx_msg)
-		seq_puts(seq, "RdyRx ");
+	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+		if (psock->sk->sk_receive_queue.qlen) {
+			if (psock->strp.rx_need_bytes)
+				seq_printf(seq, "RxWait=%u ",
+					   psock->strp.rx_need_bytes);
+			else
+				seq_printf(seq, "RxWait ");
+		}
+	} else  {
+		if (psock->strp.rx_paused)
+			seq_puts(seq, "RxPause ");
+
+		if (psock->ready_rx_msg)
+			seq_puts(seq, "RdyRx ");
+	}
 
 	seq_puts(seq, "\n");
 }
@@ -275,6 +288,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 {
 	struct kcm_psock_stats psock_stats;
 	struct kcm_mux_stats mux_stats;
+	struct strp_aggr_stats strp_stats;
 	struct kcm_mux *mux;
 	struct kcm_psock *psock;
 	struct net *net = seq->private;
@@ -282,20 +296,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 
 	memset(&mux_stats, 0, sizeof(mux_stats));
 	memset(&psock_stats, 0, sizeof(psock_stats));
+	memset(&strp_stats, 0, sizeof(strp_stats));
 
 	mutex_lock(&knet->mutex);
 
 	aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
 	aggregate_psock_stats(&knet->aggregate_psock_stats,
 			      &psock_stats);
+	aggregate_strp_stats(&knet->aggregate_strp_stats,
+			     &strp_stats);
 
 	list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
 		spin_lock_bh(&mux->lock);
 		aggregate_mux_stats(&mux->stats, &mux_stats);
 		aggregate_psock_stats(&mux->aggregate_psock_stats,
 				      &psock_stats);
-		list_for_each_entry(psock, &mux->psocks, psock_list)
+		aggregate_strp_stats(&mux->aggregate_strp_stats,
+				     &strp_stats);
+		list_for_each_entry(psock, &mux->psocks, psock_list) {
 			aggregate_psock_stats(&psock->stats, &psock_stats);
+			save_strp_stats(&psock->strp, &strp_stats);
+		}
+
 		spin_unlock_bh(&mux->lock);
 	}
 
@@ -328,7 +350,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 		   mux_stats.rx_ready_drops);
 
 	seq_printf(seq,
-		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
 		   "Psock",
 		   "RX-Msgs",
 		   "RX-Bytes",
@@ -337,6 +359,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 		   "Reserved",
 		   "Unreserved",
 		   "RX-Aborts",
+		   "RX-Intr",
+		   "RX-Unrecov",
 		   "RX-MemFail",
 		   "RX-NeedMor",
 		   "RX-BadLen",
@@ -345,20 +369,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 		   "TX-Aborts");
 
 	seq_printf(seq,
-		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   psock_stats.rx_msgs,
-		   psock_stats.rx_bytes,
+		   strp_stats.rx_msgs,
+		   strp_stats.rx_bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   psock_stats.rx_aborts,
-		   psock_stats.rx_mem_fail,
-		   psock_stats.rx_need_more_hdr,
-		   psock_stats.rx_bad_hdr_len,
-		   psock_stats.rx_msg_too_big,
-		   psock_stats.rx_msg_timeouts,
+		   strp_stats.rx_aborts,
+		   strp_stats.rx_interrupted,
+		   strp_stats.rx_unrecov_intr,
+		   strp_stats.rx_mem_fail,
+		   strp_stats.rx_need_more_hdr,
+		   strp_stats.rx_bad_hdr_len,
+		   strp_stats.rx_msg_too_big,
+		   strp_stats.rx_msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cb39e05b166c..2632ac748371 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1,3 +1,13 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
 #include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -16,7 +26,6 @@
 #include <net/kcm.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 #include <uapi/linux/kcm.h>
 
 unsigned int kcm_net_id;
@@ -35,38 +44,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
 	return (struct kcm_tx_msg *)skb->cb;
 }
 
-static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
-{
-	return (struct kcm_rx_msg *)((void *)skb->cb +
-				     offsetof(struct qdisc_skb_cb, data));
-}
-
 static void report_csk_error(struct sock *csk, int err)
 {
 	csk->sk_err = EPIPE;
 	csk->sk_error_report(csk);
 }
 
-/* Callback lock held */
-static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
-			       struct sk_buff *skb)
-{
-	struct sock *csk = psock->sk;
-
-	/* Unrecoverable error in receive */
-
-	del_timer(&psock->rx_msg_timer);
-
-	if (psock->rx_stopped)
-		return;
-
-	psock->rx_stopped = 1;
-	KCM_STATS_INCR(psock->stats.rx_aborts);
-
-	/* Report an error on the lower socket */
-	report_csk_error(csk, err);
-}
-
 static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
 			       bool wakeup_kcm)
 {
@@ -109,12 +92,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
 static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
-	KCM_STATS_ADD(mux->stats.rx_bytes,
-		      psock->stats.rx_bytes - psock->saved_rx_bytes);
+	STRP_STATS_ADD(mux->stats.rx_bytes,
+		       psock->strp.stats.rx_bytes -
+		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->stats.rx_msgs;
-	psock->saved_rx_bytes = psock->stats.rx_bytes;
+		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
+	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -167,11 +151,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
 		 */
 		list_del(&psock->psock_ready_list);
 		psock->ready_rx_msg = NULL;
-
 		/* Commit clearing of ready_rx_msg for queuing work */
 		smp_mb();
 
-		queue_work(kcm_wq, &psock->rx_work);
+		strp_unpause(&psock->strp);
+		strp_check_rcv(&psock->strp);
 	}
 
 	/* Buffer limit is okay now, add to ready list */
@@ -285,6 +269,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
 
 	if (list_empty(&mux->kcm_rx_waiters)) {
 		psock->ready_rx_msg = head;
+		strp_pause(&psock->strp);
 		list_add_tail(&psock->psock_ready_list,
 			      &mux->psocks_ready);
 		spin_unlock_bh(&mux->rx_lock);
@@ -353,346 +338,60 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
-	if (psock->sk->sk_rcvtimeo)
-		mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
-	(*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-			unsigned int orig_offset, size_t orig_len)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
-	struct kcm_rx_msg *rxm;
-	struct kcm_sock *kcm;
-	struct sk_buff *head, *skb;
-	size_t eaten = 0, cand_len;
-	ssize_t extra;
-	int err;
-	bool cloned_orig = false;
-
-	if (psock->ready_rx_msg)
-		return 0;
-
-	head = psock->rx_skb_head;
-	if (head) {
-		/* Message already in progress */
-
-		rxm = kcm_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
-			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
-			 * are consumed.
-			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
-
-			return eaten;
-		}
-
-		if (unlikely(orig_offset)) {
-			/* Getting data with a non-zero offset when a message is
-			 * in progress is not expected. If it does happen, we
-			 * need to clone and pull since we can't deal with
-			 * offsets in the skbs for a message expect in the head.
-			 */
-			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
-			if (!orig_skb) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			if (!pskb_pull(orig_skb, orig_offset)) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				kfree_skb(orig_skb);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			cloned_orig = true;
-			orig_offset = 0;
-		}
-
-		if (!psock->rx_skb_nextp) {
-			/* We are going to append to the frags_list of head.
-			 * Need to unshare the frag_list.
-			 */
-			err = skb_unclone(head, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				return 0;
-			}
-
-			if (unlikely(skb_shinfo(head)->frag_list)) {
-				/* We can't append to an sk_buff that already
-				 * has a frag_list. We create a new head, point
-				 * the frag_list of that to the old head, and
-				 * then are able to use the old head->next for
-				 * appending to the message.
-				 */
-				if (WARN_ON(head->next)) {
-					desc->error = -EINVAL;
-					return 0;
-				}
-
-				skb = alloc_skb(0, GFP_ATOMIC);
-				if (!skb) {
-					KCM_STATS_INCR(psock->stats.rx_mem_fail);
-					desc->error = -ENOMEM;
-					return 0;
-				}
-				skb->len = head->len;
-				skb->data_len = head->len;
-				skb->truesize = head->truesize;
-				*kcm_rx_msg(skb) = *kcm_rx_msg(head);
-				psock->rx_skb_nextp = &head->next;
-				skb_shinfo(skb)->frag_list = head;
-				psock->rx_skb_head = skb;
-				head = skb;
-			} else {
-				psock->rx_skb_nextp =
-				    &skb_shinfo(head)->frag_list;
-			}
-		}
-	}
-
-	while (eaten < orig_len) {
-		/* Always clone since we will consume something */
-		skb = skb_clone(orig_skb, GFP_ATOMIC);
-		if (!skb) {
-			KCM_STATS_INCR(psock->stats.rx_mem_fail);
-			desc->error = -ENOMEM;
-			break;
-		}
-
-		cand_len = orig_len - eaten;
-
-		head = psock->rx_skb_head;
-		if (!head) {
-			head = skb;
-			psock->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			psock->rx_skb_nextp = NULL;
-			rxm = kcm_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->offset = orig_offset + eaten;
-		} else {
-			/* Unclone since we may be appending to an skb that we
-			 * already share a frag_list with.
-			 */
-			err = skb_unclone(skb, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				break;
-			}
-
-			rxm = kcm_rx_msg(head);
-			*psock->rx_skb_nextp = skb;
-			psock->rx_skb_nextp = &skb->next;
-			head->data_len += skb->len;
-			head->len += skb->len;
-			head->truesize += skb->truesize;
-		}
-
-		if (!rxm->full_len) {
-			ssize_t len;
-
-			len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
-			if (!len) {
-				/* Need more header to determine length */
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-				rxm->accum_len += cand_len;
-				eaten += cand_len;
-				KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
-				WARN_ON(eaten != orig_len);
-				break;
-			} else if (len > psock->sk->sk_rcvbuf) {
-				/* Message length exceeds maximum allowed */
-				KCM_STATS_INCR(psock->stats.rx_msg_too_big);
-				desc->error = -EMSGSIZE;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EMSGSIZE, head);
-				break;
-			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->offset) {
-				/* Length must be into new skb (and also
-				 * greater than zero)
-				 */
-				KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
-				desc->error = -EPROTO;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EPROTO, head);
-				break;
-			}
-
-			rxm->full_len = len;
-		}
-
-		extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
-		if (extra < 0) {
-			/* Message not complete yet. */
-			if (rxm->full_len - rxm->accum_len >
-			    tcp_inq(psock->sk)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set psock->rx_need_bytes to wait for
-				 * the rest of the message. Also, set "early
-				 * eaten" since we've already buffered the skb
-				 * but don't consume yet per tcp_read_sock.
-				 */
-
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-
-				psock->rx_need_bytes = rxm->full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
-				desc->count = 0; /* Stop reading socket */
-				break;
-			}
-			rxm->accum_len += cand_len;
-			eaten += cand_len;
-			WARN_ON(eaten != orig_len);
-			break;
-		}
-
-		/* Positive extra indicates ore bytes than needed for the
-		 * message
-		 */
-
-		WARN_ON(extra > cand_len);
-
-		eaten += (cand_len - extra);
-
-		/* Hurray, we have a new message! */
-		del_timer(&psock->rx_msg_timer);
-		psock->rx_skb_head = NULL;
-		KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
-		kcm = reserve_rx_kcm(psock, head);
-		if (!kcm) {
-			/* Unable to reserve a KCM, message is held in psock. */
-			break;
-		}
-
-		if (kcm_queue_rcv_skb(&kcm->sk, head)) {
-			/* Should mean socket buffer full */
-			unreserve_rx_kcm(psock, false);
-			goto try_queue;
-		}
-	}
-
-	if (cloned_orig)
-		kfree_skb(orig_skb);
-
-	KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
-	return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
-	read_descriptor_t desc;
-
-	desc.arg.data = psock;
-	desc.error = 0;
-	desc.count = 1; /* give more than one skb per call */
-
-	/* sk should be locked here, so okay to do tcp_read_sock */
-	tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
-	unreserve_rx_kcm(psock, true);
-
-	return desc.error;
-}
-
 /* Lower sock lock held */
-static void psock_tcp_data_ready(struct sock *sk)
+static void psock_data_ready(struct sock *sk)
 {
 	struct kcm_psock *psock;
 
 	read_lock_bh(&sk->sk_callback_lock);
 
 	psock = (struct kcm_psock *)sk->sk_user_data;
-	if (unlikely(!psock || psock->rx_stopped))
-		goto out;
+	if (likely(psock))
+		strp_data_ready(&psock->strp);
 
-	if (psock->ready_rx_msg)
-		goto out;
-
-	if (psock->rx_need_bytes) {
-		if (tcp_inq(sk) >= psock->rx_need_bytes)
-			psock->rx_need_bytes = 0;
-		else
-			goto out;
-	}
-
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void do_psock_rx_work(struct kcm_psock *psock)
+/* Called with lower sock held */
+static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	read_descriptor_t rd_desc;
-	struct sock *csk = psock->sk;
-
-	/* We need the read lock to synchronize with psock_tcp_data_ready. We
-	 * need the socket lock for calling tcp_read_sock.
-	 */
-	lock_sock(csk);
-	read_lock_bh(&csk->sk_callback_lock);
-
-	if (unlikely(csk->sk_user_data != psock))
-		goto out;
-
-	if (unlikely(psock->rx_stopped))
-		goto out;
-
-	if (psock->ready_rx_msg)
-		goto out;
-
-	rd_desc.arg.data = psock;
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct kcm_sock *kcm;
 
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+try_queue:
+	kcm = reserve_rx_kcm(psock, skb);
+	if (!kcm) {
+		 /* Unable to reserve a KCM, message is held in psock and strp
+		  * is paused.
+		  */
+		return;
+	}
 
-out:
-	read_unlock_bh(&csk->sk_callback_lock);
-	release_sock(csk);
+	if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+		/* Should mean socket buffer full */
+		unreserve_rx_kcm(psock, false);
+		goto try_queue;
+	}
 }
 
-static void psock_rx_work(struct work_struct *w)
+static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct bpf_prog *prog = psock->bpf_prog;
+
+	return (*prog->bpf_func)(skb, prog->insnsi);
 }
 
-static void psock_rx_delayed_work(struct work_struct *w)
+static int kcm_read_sock_done(struct strparser *strp, int err)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock,
-				      rx_delayed_work.work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+
+	unreserve_rx_kcm(psock, true);
+
+	return err;
 }
 
-static void psock_tcp_state_change(struct sock *sk)
+static void psock_state_change(struct sock *sk)
 {
 	/* TCP only does a POLLIN for a half close. Do a POLLHUP here
 	 * since application will normally not poll with POLLIN
@@ -702,7 +401,7 @@ static void psock_tcp_state_change(struct sock *sk)
 	report_csk_error(sk, EPIPE);
 }
 
-static void psock_tcp_write_space(struct sock *sk)
+static void psock_write_space(struct sock *sk)
 {
 	struct kcm_psock *psock;
 	struct kcm_mux *mux;
@@ -713,14 +412,13 @@ static void psock_tcp_write_space(struct sock *sk)
 	psock = (struct kcm_psock *)sk->sk_user_data;
 	if (unlikely(!psock))
 		goto out;
-
 	mux = psock->mux;
 
 	spin_lock_bh(&mux->lock);
 
 	/* Check if the socket is reserved so someone is waiting for sending. */
 	kcm = psock->tx_kcm;
-	if (kcm)
+	if (kcm && !unlikely(kcm->tx_stopped))
 		queue_work(kcm_wq, &kcm->tx_work);
 
 	spin_unlock_bh(&mux->lock);
@@ -1411,7 +1109,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1425,7 +1123,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1481,7 +1179,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1498,7 +1196,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1674,15 +1372,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_rx_msg_timeout(unsigned long arg)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)arg;
-
-	/* Message assembly timed out */
-	KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
-	kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
-}
-
 static int kcm_attach(struct socket *sock, struct socket *csock,
 		      struct bpf_prog *prog)
 {
@@ -1692,19 +1381,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
-
-	if (csock->ops->family != PF_INET &&
-	    csock->ops->family != PF_INET6)
-		return -EINVAL;
+	struct strp_callbacks cb;
+	int err;
 
 	csk = csock->sk;
 	if (!csk)
 		return -EINVAL;
 
-	/* Only support TCP for now */
-	if (csk->sk_protocol != IPPROTO_TCP)
-		return -EINVAL;
-
 	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
 	if (!psock)
 		return -ENOMEM;
@@ -1713,11 +1396,16 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
-		    (unsigned long)psock);
+	cb.rcv_msg = kcm_rcv_strparser;
+	cb.abort_parser = NULL;
+	cb.parse_msg = kcm_parse_func_strparser;
+	cb.read_sock_done = kcm_read_sock_done;
 
-	INIT_WORK(&psock->rx_work, psock_rx_work);
-	INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+	err = strp_init(&psock->strp, csk, &cb);
+	if (err) {
+		kmem_cache_free(kcm_psockp, psock);
+		return err;
+	}
 
 	sock_hold(csk);
 
@@ -1726,9 +1414,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	psock->save_write_space = csk->sk_write_space;
 	psock->save_state_change = csk->sk_state_change;
 	csk->sk_user_data = psock;
-	csk->sk_data_ready = psock_tcp_data_ready;
-	csk->sk_write_space = psock_tcp_write_space;
-	csk->sk_state_change = psock_tcp_state_change;
+	csk->sk_data_ready = psock_data_ready;
+	csk->sk_write_space = psock_write_space;
+	csk->sk_state_change = psock_state_change;
 	write_unlock_bh(&csk->sk_callback_lock);
 
 	/* Finished initialization, now add the psock to the MUX. */
@@ -1750,7 +1438,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	spin_unlock_bh(&mux->lock);
 
 	/* Schedule RX work in case there are already bytes queued */
-	queue_work(kcm_wq, &psock->rx_work);
+	strp_check_rcv(&psock->strp);
 
 	return 0;
 }
@@ -1790,6 +1478,8 @@ static void kcm_unattach(struct kcm_psock *psock)
 	struct sock *csk = psock->sk;
 	struct kcm_mux *mux = psock->mux;
 
+	lock_sock(csk);
+
 	/* Stop getting callbacks from TCP socket. After this there should
 	 * be no way to reserve a kcm for this psock.
 	 */
@@ -1798,7 +1488,7 @@ static void kcm_unattach(struct kcm_psock *psock)
 	csk->sk_data_ready = psock->save_data_ready;
 	csk->sk_write_space = psock->save_write_space;
 	csk->sk_state_change = psock->save_state_change;
-	psock->rx_stopped = 1;
+	strp_stop(&psock->strp);
 
 	if (WARN_ON(psock->rx_kcm)) {
 		write_unlock_bh(&csk->sk_callback_lock);
@@ -1821,18 +1511,17 @@ static void kcm_unattach(struct kcm_psock *psock)
 
 	write_unlock_bh(&csk->sk_callback_lock);
 
-	del_timer_sync(&psock->rx_msg_timer);
-	cancel_work_sync(&psock->rx_work);
-	cancel_delayed_work_sync(&psock->rx_delayed_work);
+	/* Call strp_done without sock lock */
+	release_sock(csk);
+	strp_done(&psock->strp);
+	lock_sock(csk);
 
 	bpf_prog_put(psock->bpf_prog);
 
-	kfree_skb(psock->rx_skb_head);
-	psock->rx_skb_head = NULL;
-
 	spin_lock_bh(&mux->lock);
 
 	aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+	save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
 
 	KCM_STATS_INCR(mux->stats.psock_unattach);
 
@@ -1875,6 +1564,8 @@ no_reserved:
 		fput(csk->sk_socket->file);
 		kmem_cache_free(kcm_psockp, psock);
 	}
+
+	release_sock(csk);
 }
 
 static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
@@ -1915,6 +1606,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
 
 		spin_unlock_bh(&mux->lock);
 
+		/* Lower socket lock should already be held */
 		kcm_unattach(psock);
 
 		err = 0;
@@ -2072,6 +1764,8 @@ static void release_mux(struct kcm_mux *mux)
 	aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
 	aggregate_psock_stats(&mux->aggregate_psock_stats,
 			      &knet->aggregate_psock_stats);
+	aggregate_strp_stats(&mux->aggregate_strp_stats,
+			     &knet->aggregate_strp_stats);
 	list_del_rcu(&mux->kcm_mux_list);
 	knet->count--;
 	mutex_unlock(&knet->mutex);
@@ -2151,6 +1845,13 @@ static int kcm_release(struct socket *sock)
 	 * it will just return.
 	 */
 	__skb_queue_purge(&sk->sk_write_queue);
+
+	/* Set tx_stopped. This is checked when psock is bound to a kcm and we
+	 * get a writespace callback. This prevents further work being queued
+	 * from the callback (unbinding the psock occurs after canceling work.
+	 */
+	kcm->tx_stopped = 1;
+
 	release_sock(sk);
 
 	spin_lock_bh(&mux->lock);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 232cb92033e8..34eff77982cf 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, 2))
 		return 1;
 
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+	if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
 		skb_pull(skb, 2);
 
 	return 0;
@@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 			    size_t total_len)
 {
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int error;
@@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 	error = -ENOMEM;
 	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
 			   uhlen + session->hdr_len +
-			   sizeof(ppph) + total_len,
+			   2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 			   0, GFP_KERNEL);
 	if (!skb)
 		goto error_put_sess_tun;
@@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 	skb_reserve(skb, uhlen);
 
 	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
+	skb->data[0] = PPP_ALLSTATIONS;
+	skb->data[1] = PPP_UI;
 	skb_put(skb, 2);
 
 	/* Copy user data into skb */
@@ -369,7 +368,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-	static const u8 ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = (struct sock *) chan->private;
 	struct sock *sk_tun;
 	struct l2tp_session *session;
@@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 		   sizeof(struct iphdr) + /* IP header */
 		   uhlen +		/* UDP header (if L2TP_ENCAPTYPE_UDP) */
 		   session->hdr_len +	/* L2TP header */
-		   sizeof(ppph);	/* PPP header */
+		   2;			/* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 	if (skb_cow_head(skb, headroom))
 		goto abort_put_sess_tun;
 
 	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
+	__skb_push(skb, 2);
+	skb->data[0] = PPP_ALLSTATIONS;
+	skb->data[1] = PPP_UI;
 
 	local_bh_disable();
 	l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
 	if (sock) {
-		inet_shutdown(sock, 2);
+		inet_shutdown(sock, SEND_SHUTDOWN);
 		/* Don't let the session go away before our socket does */
 		l2tp_session_inc_refcount(session);
 	}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ba5fc1f01e53..42a41ae405ba 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1088,13 +1088,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
 }
 
 static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
-					      struct ieee80211_sta *sta)
+					      struct sta_info *sta)
 {
 	u32 ret = 0;
 
-	trace_drv_get_expected_throughput(sta);
-	if (local->ops->get_expected_throughput)
-		ret = local->ops->get_expected_throughput(&local->hw, sta);
+	trace_drv_get_expected_throughput(&sta->sta);
+	if (local->ops->get_expected_throughput && sta->uploaded)
+		ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
 	trace_drv_return_u32(local, ret);
 
 	return ret;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 8f9c3bde835f..fa7d37cf0351 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
 	u32 tx_time, estimated_retx;
 	u64 result;
 
-	if (sta->mesh->fail_avg >= 100)
-		return MAX_METRIC;
+	/* Try to get rate based on HW/SW RC algorithm.
+	 * Rate is returned in units of Kbps, correct this
+	 * to comply with airtime calculation units
+	 * Round up in case we get rate < 100Kbps
+	 */
+	rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
 
-	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
-	rate = cfg80211_calculate_bitrate(&rinfo);
-	if (WARN_ON(!rate))
-		return MAX_METRIC;
+	if (rate) {
+		err = 0;
+	} else {
+		if (sta->mesh->fail_avg >= 100)
+			return MAX_METRIC;
 
-	err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+		rate = cfg80211_calculate_bitrate(&rinfo);
+		if (WARN_ON(!rate))
+			return MAX_METRIC;
+
+		err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+	}
 
 	/* bitrate is in units of 100 Kbps, while we need rate in units of
 	 * 1Mbps. This will be corrected on tx_time computation.
 	 */
 	tx_time = (device_constant + 10 * test_frame_len / rate);
 	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
-	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
 	return (u32)result;
 }
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 76b737dcc36f..19f14c907d74 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2279,11 +2279,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-	/* check if the driver has a SW RC implementation */
-	if (ref && ref->ops->get_expected_throughput)
-		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
-	else
-		thr = drv_get_expected_throughput(local, &sta->sta);
+	thr = sta_get_expected_throughput(sta);
 
 	if (thr != 0) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@ -2291,6 +2287,25 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	}
 }
 
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct rate_control_ref *ref = NULL;
+	u32 thr = 0;
+
+	if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+		ref = local->rate_ctrl;
+
+	/* check if the driver has a SW RC implementation */
+	if (ref && ref->ops->get_expected_throughput)
+		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+	else
+		thr = drv_get_expected_throughput(local, sta);
+
+	return thr;
+}
+
 unsigned long ieee80211_sta_last_active(struct sta_info *sta)
 {
 	struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b0ef32dddd..0556be3e3628 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -712,6 +712,8 @@ void sta_set_rate_info_tx(struct sta_info *sta,
 			  struct rate_info *rinfo);
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
 
+u32 sta_get_expected_throughput(struct sta_info *sta);
+
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 			  unsigned long exp_time);
 u8 sta_info_tx_streams(struct sta_info *sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 502396694f47..1d0746dfea57 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2334,7 +2334,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
-	int nh_pos, h_pos;
 	bool wme_sta = false, authorized = false;
 	bool tdls_peer;
 	bool multicast;
@@ -2640,13 +2639,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 		encaps_len = 0;
 	}
 
-	nh_pos = skb_network_header(skb) - skb->data;
-	h_pos = skb_transport_header(skb) - skb->data;
-
 	skb_pull(skb, skip_header_bytes);
-	nh_pos -= skip_header_bytes;
-	h_pos -= skip_header_bytes;
-
 	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
 	/*
@@ -2672,18 +2665,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	if (encaps_data) {
+	if (encaps_data)
 		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
-		nh_pos += encaps_len;
-		h_pos += encaps_len;
-	}
 
 #ifdef CONFIG_MAC80211_MESH
-	if (meshhdrlen > 0) {
+	if (meshhdrlen > 0)
 		memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
-		nh_pos += meshhdrlen;
-		h_pos += meshhdrlen;
-	}
 #endif
 
 	if (ieee80211_is_data_qos(fc)) {
@@ -2699,15 +2686,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	} else
 		memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
 
-	nh_pos += hdrlen;
-	h_pos += hdrlen;
-
-	/* Update skb pointers to various headers since this modified frame
-	 * is going to go through Linux networking code that may potentially
-	 * need things like pointer to IP header. */
 	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, nh_pos);
-	skb_set_transport_header(skb, h_pos);
 
 	info = IEEE80211_SKB_CB(skb);
 	memset(info, 0, sizeof(*info));
@@ -4390,9 +4369,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 	int ac = ieee802_1d_to_ac[tid & 7];
 
 	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-
 	skb_set_queue_mapping(skb, ac);
 	skb->priority = tid;
 
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8dd836a8dd60..3e3e2534478a 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -63,43 +63,75 @@ out_nlmsg_trim:
 static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 				int protocol, int s_num)
 {
+	struct rhashtable_iter *hti = (void *)cb->args[2];
 	struct netlink_table *tbl = &nl_table[protocol];
-	struct rhashtable *ht = &tbl->hash;
-	const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
 	struct net *net = sock_net(skb->sk);
 	struct netlink_diag_req *req;
 	struct netlink_sock *nlsk;
 	struct sock *sk;
-	int ret = 0, num = 0, i;
+	int num = 2;
+	int ret = 0;
 
 	req = nlmsg_data(cb->nlh);
 
-	for (i = 0; i < htbl->size; i++) {
-		struct rhash_head *pos;
+	if (s_num > 1)
+		goto mc_list;
 
-		rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
-			sk = (struct sock *)nlsk;
+	num--;
 
-			if (!net_eq(sock_net(sk), net))
-				continue;
-			if (num < s_num) {
-				num++;
+	if (!hti) {
+		hti = kmalloc(sizeof(*hti), GFP_KERNEL);
+		if (!hti)
+			return -ENOMEM;
+
+		cb->args[2] = (long)hti;
+	}
+
+	if (!s_num)
+		rhashtable_walk_enter(&tbl->hash, hti);
+
+	ret = rhashtable_walk_start(hti);
+	if (ret == -EAGAIN)
+		ret = 0;
+	if (ret)
+		goto stop;
+
+	while ((nlsk = rhashtable_walk_next(hti))) {
+		if (IS_ERR(nlsk)) {
+			ret = PTR_ERR(nlsk);
+			if (ret == -EAGAIN) {
+				ret = 0;
 				continue;
 			}
+			break;
+		}
 
-			if (sk_diag_fill(sk, skb, req,
-					 NETLINK_CB(cb->skb).portid,
-					 cb->nlh->nlmsg_seq,
-					 NLM_F_MULTI,
-					 sock_i_ino(sk)) < 0) {
-				ret = 1;
-				goto done;
-			}
+		sk = (struct sock *)nlsk;
 
-			num++;
+		if (!net_eq(sock_net(sk), net))
+			continue;
+
+		if (sk_diag_fill(sk, skb, req,
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq,
+				 NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0) {
+			ret = 1;
+			break;
 		}
 	}
 
+stop:
+	rhashtable_walk_stop(hti);
+	if (ret)
+		goto done;
+
+	rhashtable_walk_exit(hti);
+	cb->args[2] = 0;
+	num++;
+
+mc_list:
+	read_lock(&nl_table_lock);
 	sk_for_each_bound(sk, &tbl->mc_list) {
 		if (sk_hashed(sk))
 			continue;
@@ -116,13 +148,14 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 				 NLM_F_MULTI,
 				 sock_i_ino(sk)) < 0) {
 			ret = 1;
-			goto done;
+			break;
 		}
 		num++;
 	}
+	read_unlock(&nl_table_lock);
+
 done:
 	cb->args[0] = num;
-	cb->args[1] = protocol;
 
 	return ret;
 }
@@ -131,20 +164,20 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct netlink_diag_req *req;
 	int s_num = cb->args[0];
+	int err = 0;
 
 	req = nlmsg_data(cb->nlh);
 
-	rcu_read_lock();
-	read_lock(&nl_table_lock);
-
 	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
 		int i;
 
 		for (i = cb->args[1]; i < MAX_LINKS; i++) {
-			if (__netlink_diag_dump(skb, cb, i, s_num))
+			err = __netlink_diag_dump(skb, cb, i, s_num);
+			if (err)
 				break;
 			s_num = 0;
 		}
+		cb->args[1] = i;
 	} else {
 		if (req->sdiag_protocol >= MAX_LINKS) {
 			read_unlock(&nl_table_lock);
@@ -152,13 +185,22 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			return -ENOENT;
 		}
 
-		__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+		err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
 	}
 
-	read_unlock(&nl_table_lock);
-	rcu_read_unlock();
+	return err < 0 ? err : skb->len;
+}
+
+static int netlink_diag_dump_done(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *hti = (void *)cb->args[2];
+
+	if (cb->args[0] == 1)
+		rhashtable_walk_exit(hti);
 
-	return skb->len;
+	kfree(hti);
+
+	return 0;
 }
 
 static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
@@ -172,6 +214,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (h->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = netlink_diag_dump,
+			.done = netlink_diag_dump_done,
 		};
 		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
 	} else
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 046f7508c06b..45ac8e8e58f4 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
+__printf(2, 3)
 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 			     struct rdma_cm_event *event);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b2d17f0fafa8..fd0bccb2f9f9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...);
 #define rds_conn_error(conn, fmt...) \
 	__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
 
+__printf(2, 3)
 void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
 #define rds_conn_path_error(cp, fmt...) \
 	__rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 88effadd4b16..c7cf356b42b8 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -22,6 +22,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
 #include "ar-internal.h"
 
 MODULE_DESCRIPTION("RxRPC network protocol");
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ff83fb1ddd47..c761124961cc 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -255,6 +255,9 @@ enum rxrpc_conn_flag {
 	RXRPC_CONN_HAS_IDR,		/* Has a client conn ID assigned */
 	RXRPC_CONN_IN_SERVICE_CONNS,	/* Conn is in peer->service_conns */
 	RXRPC_CONN_IN_CLIENT_CONNS,	/* Conn is in local->client_conns */
+	RXRPC_CONN_EXPOSED,		/* Conn has extra ref for exposure */
+	RXRPC_CONN_DONT_REUSE,		/* Don't reuse this connection */
+	RXRPC_CONN_COUNTED,		/* Counted by rxrpc_nr_client_conns */
 };
 
 /*
@@ -265,6 +268,17 @@ enum rxrpc_conn_event {
 };
 
 /*
+ * The connection cache state.
+ */
+enum rxrpc_conn_cache_state {
+	RXRPC_CONN_CLIENT_INACTIVE,	/* Conn is not yet listed */
+	RXRPC_CONN_CLIENT_WAITING,	/* Conn is on wait list, waiting for capacity */
+	RXRPC_CONN_CLIENT_ACTIVE,	/* Conn is on active list, doing calls */
+	RXRPC_CONN_CLIENT_CULLED,	/* Conn is culled and delisted, doing calls */
+	RXRPC_CONN_CLIENT_IDLE,		/* Conn is on idle list, doing mostly nothing */
+};
+
+/*
  * The connection protocol state.
  */
 enum rxrpc_conn_proto_state {
@@ -276,6 +290,7 @@ enum rxrpc_conn_proto_state {
 	RXRPC_CONN_REMOTELY_ABORTED,	/* Conn aborted by peer */
 	RXRPC_CONN_LOCALLY_ABORTED,	/* Conn aborted locally */
 	RXRPC_CONN_NETWORK_ERROR,	/* Conn terminated by network error */
+	RXRPC_CONN_LOCAL_ERROR,		/* Conn terminated by local error */
 	RXRPC_CONN__NR_STATES
 };
 
@@ -288,23 +303,33 @@ struct rxrpc_connection {
 	struct rxrpc_conn_proto	proto;
 	struct rxrpc_conn_parameters params;
 
-	spinlock_t		channel_lock;
+	atomic_t		usage;
+	struct rcu_head		rcu;
+	struct list_head	cache_link;
 
+	spinlock_t		channel_lock;
+	unsigned char		active_chans;	/* Mask of active channels */
+#define RXRPC_ACTIVE_CHANS_MASK	((1 << RXRPC_MAXCALLS) - 1)
+	struct list_head	waiting_calls;	/* Calls waiting for channels */
 	struct rxrpc_channel {
 		struct rxrpc_call __rcu	*call;		/* Active call */
 		u32			call_id;	/* ID of current call */
 		u32			call_counter;	/* Call ID counter */
 		u32			last_call;	/* ID of last call */
-		u32			last_result;	/* Result of last call (0/abort) */
+		u8			last_type;	/* Type of last packet */
+		u16			last_service_id;
+		union {
+			u32		last_seq;
+			u32		last_abort;
+		};
 	} channels[RXRPC_MAXCALLS];
-	wait_queue_head_t	channel_wq;	/* queue to wait for channel to become available */
 
-	struct rcu_head		rcu;
 	struct work_struct	processor;	/* connection event processor */
 	union {
 		struct rb_node	client_node;	/* Node in local->client_conns */
 		struct rb_node	service_node;	/* Node in peer->service_conns */
 	};
+	struct list_head	proc_link;	/* link in procfs list */
 	struct list_head	link;		/* link in master connection list */
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 	const struct rxrpc_security *security;	/* applied security module */
@@ -313,17 +338,16 @@ struct rxrpc_connection {
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		flags;
 	unsigned long		events;
-	unsigned long		put_time;	/* Time at which last put */
+	unsigned long		idle_timestamp;	/* Time at which last became idle */
 	spinlock_t		state_lock;	/* state-change lock */
-	atomic_t		usage;
+	enum rxrpc_conn_cache_state cache_state : 8;
 	enum rxrpc_conn_proto_state state : 8;	/* current state of connection */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
 	int			error;		/* local error incurred */
 	int			debug_id;	/* debug ID for printks */
 	atomic_t		serial;		/* packet serial number counter */
-	atomic_t		hi_serial;	/* highest serial number received */
-	atomic_t		avail_chans;	/* number of channels available */
+	unsigned int		hi_serial;	/* highest serial number received */
 	u8			size_align;	/* data size alignment (for security) */
 	u8			header_size;	/* rxrpc + security header size */
 	u8			security_size;	/* security header size */
@@ -341,10 +365,11 @@ enum rxrpc_call_flag {
 	RXRPC_CALL_RCVD_LAST,		/* all packets received */
 	RXRPC_CALL_RUN_RTIMER,		/* Tx resend timer started */
 	RXRPC_CALL_TX_SOFT_ACK,		/* sent some soft ACKs */
-	RXRPC_CALL_PROC_BUSY,		/* the processor is busy */
 	RXRPC_CALL_INIT_ACCEPT,		/* acceptance was initiated */
 	RXRPC_CALL_HAS_USERID,		/* has a user ID attached */
 	RXRPC_CALL_EXPECT_OOS,		/* expect out of sequence packets */
+	RXRPC_CALL_IS_SERVICE,		/* Call is service call */
+	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
 };
 
 /*
@@ -402,6 +427,7 @@ enum rxrpc_call_state {
 struct rxrpc_call {
 	struct rcu_head		rcu;
 	struct rxrpc_connection	*conn;		/* connection carrying call */
+	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_sock	*socket;	/* socket responsible */
 	struct timer_list	lifetimer;	/* lifetime remaining on call */
 	struct timer_list	deadspan;	/* reap timer for re-ACK'ing, etc  */
@@ -410,13 +436,14 @@ struct rxrpc_call {
 	struct work_struct	destroyer;	/* call destroyer */
 	struct work_struct	processor;	/* packet processor and ACK generator */
 	struct list_head	link;		/* link in master call list */
+	struct list_head	chan_wait_link;	/* Link in conn->waiting_calls */
 	struct hlist_node	error_link;	/* link in error distribution list */
 	struct list_head	accept_link;	/* calls awaiting acceptance */
 	struct rb_node		sock_node;	/* node in socket call tree */
 	struct sk_buff_head	rx_queue;	/* received packets */
 	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
 	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
-	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	wait_queue_head_t	waitq;		/* Wait queue for channel or Tx */
 	__be32			crypto_buf[2];	/* Temporary packet crypto buffer */
 	unsigned long		user_call_ID;	/* user-defined call ID */
 	unsigned long		creation_jif;	/* time of call creation */
@@ -432,8 +459,10 @@ struct rxrpc_call {
 	int			error_report;	/* Network error (ICMP/local transport) */
 	int			error;		/* Local error incurred */
 	enum rxrpc_call_state	state : 8;	/* current state of call */
+	u16			service_id;	/* service ID */
+	u32			call_id;	/* call ID on connection  */
+	u32			cid;		/* connection ID plus channel index */
 	int			debug_id;	/* debug ID for printks */
-	u8			channel;	/* connection channel occupied by this call */
 
 	/* transmission-phase ACK management */
 	u8			acks_head;	/* offset into window of first entry */
@@ -455,19 +484,13 @@ struct rxrpc_call {
 	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
 	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
 	u8			ackr_reason;	/* reason to ACK */
+	u16			ackr_skew;	/* skew on packet being ACK'd */
 	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
 	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
 
 	/* received packet records, 1 bit per record */
 #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
 	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
-
-	u8			in_clientflag;	/* Copy of conn->in_clientflag */
-	struct rxrpc_local	*local;		/* Local endpoint. */
-	u32			call_id;	/* call ID on connection  */
-	u32			cid;		/* connection ID plus channel index */
-	u32			epoch;		/* epoch of this connection */
-	u16			service_id;	/* service ID */
 };
 
 /*
@@ -484,6 +507,8 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
 	write_unlock_bh(&call->state_lock);
 }
 
+#include <trace/events/rxrpc.h>
+
 /*
  * af_rxrpc.c
  */
@@ -502,8 +527,8 @@ int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * call_event.c
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool);
 void rxrpc_process_call(struct work_struct *);
 
 /*
@@ -528,15 +553,32 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 void __rxrpc_put_call(struct rxrpc_call *);
 void __exit rxrpc_destroy_all_calls(void);
 
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+	return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+	return !rxrpc_is_service_call(call);
+}
+
 /*
  * conn_client.c
  */
+extern unsigned int rxrpc_max_client_connections;
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned int rxrpc_conn_idle_client_expiry;
+extern unsigned int rxrpc_conn_idle_client_fast_expiry;
 extern struct idr rxrpc_client_conn_ids;
 
 void rxrpc_destroy_client_conn_ids(void);
 int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
 		       struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_call *);
+void rxrpc_put_client_conn(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_client_connections(void);
 
 /*
  * conn_event.c
@@ -550,15 +592,17 @@ void rxrpc_reject_packets(struct rxrpc_local *);
  */
 extern unsigned int rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
+extern struct list_head rxrpc_connection_proc_list;
 extern rwlock_t rxrpc_connection_lock;
 
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
 						   struct sk_buff *);
-void __rxrpc_disconnect_call(struct rxrpc_call *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
 void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_put_connection(struct rxrpc_connection *);
+void rxrpc_kill_connection(struct rxrpc_connection *);
+void __rxrpc_put_connection(struct rxrpc_connection *);
 void __exit rxrpc_destroy_all_connections(void);
 
 static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
@@ -582,6 +626,21 @@ struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *con
 	return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
 }
 
+static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	if (!conn)
+		return;
+
+	if (rxrpc_conn_is_client(conn)) {
+		if (atomic_dec_and_test(&conn->usage))
+			rxrpc_put_client_conn(conn);
+	} else {
+		if (atomic_dec_return(&conn->usage) == 1)
+			__rxrpc_put_connection(conn);
+	}
+}
+
+
 static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
 {
 	if (!rxrpc_get_connection_maybe(conn))
@@ -697,9 +756,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
 				     struct sockaddr_rxrpc *, gfp_t);
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
 
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
 {
 	atomic_inc(&peer->usage);
+	return peer;
 }
 
 static inline
@@ -747,6 +807,11 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *);
  * skbuff.c
  */
 void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *);
+void rxrpc_see_skb(struct sk_buff *);
+void rxrpc_get_skb(struct sk_buff *);
+void rxrpc_free_skb(struct sk_buff *);
+void rxrpc_purge_queue(struct sk_buff_head *);
 
 /*
  * sysctl.c
@@ -894,44 +959,6 @@ do {						\
 
 #endif /* __KDEBUGALL */
 
-/*
- * socket buffer accounting / leak finding
- */
-static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
-{
-	//_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-	//atomic_inc(&rxrpc_n_skbs);
-}
-
-#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
-
-static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
-{
-	//_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-	//atomic_dec(&rxrpc_n_skbs);
-}
-
-#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
-
-static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
-{
-	if (skb) {
-		CHECK_SLAB_OKAY(&skb->users);
-		//_net("free skb %p %s [%d]",
-		//     skb, fn, atomic_read(&rxrpc_n_skbs));
-		//atomic_dec(&rxrpc_n_skbs);
-		kfree_skb(skb);
-	}
-}
-
-#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
-
-static inline void rxrpc_purge_queue(struct sk_buff_head *list)
-{
-	struct sk_buff *skb;
-	while ((skb = skb_dequeue((list))) != NULL)
-		rxrpc_free_skb(skb);
-}
 
 #define rxrpc_get_call(CALL)				\
 do {							\
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 9bae21e66d65..669ac79d3b44 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -203,6 +203,7 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
 
 	_net("incoming call skb %p", skb);
 
+	rxrpc_see_skb(skb);
 	sp = rxrpc_skb(skb);
 
 	/* Set up a response packet header in case we need it */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e60cf65c2232..5292bcfd8816 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -25,7 +25,7 @@
  * propose an ACK be sent
  */
 void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-			 u32 serial, bool immediate)
+			 u16 skew, u32 serial, bool immediate)
 {
 	unsigned long expiry;
 	s8 prior = rxrpc_ack_priority[ack_reason];
@@ -44,8 +44,10 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 	/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
 	 * numbers */
 	if (prior == rxrpc_ack_priority[call->ackr_reason]) {
-		if (prior <= 4)
+		if (prior <= 4) {
+			call->ackr_skew = skew;
 			call->ackr_serial = serial;
+		}
 		if (immediate)
 			goto cancel_timer;
 		return;
@@ -103,13 +105,13 @@ cancel_timer:
  * propose an ACK be sent, locking the call structure
  */
 void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-		       u32 serial, bool immediate)
+		       u16 skew, u32 serial, bool immediate)
 {
 	s8 prior = rxrpc_ack_priority[ack_reason];
 
 	if (prior > rxrpc_ack_priority[call->ackr_reason]) {
 		spin_lock_bh(&call->lock);
-		__rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+		__rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate);
 		spin_unlock_bh(&call->lock);
 	}
 }
@@ -191,6 +193,8 @@ static void rxrpc_resend(struct rxrpc_call *call)
 				stop = true;
 				sp->resend_at = jiffies + 3;
 			} else {
+				if (rxrpc_is_client_call(call))
+					rxrpc_expose_client_call(call);
 				sp->resend_at =
 					jiffies + rxrpc_resend_timeout;
 			}
@@ -376,7 +380,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
 		call->acks_hard++;
 	}
 
-	wake_up(&call->tx_waitq);
+	wake_up(&call->waitq);
 }
 
 /*
@@ -407,6 +411,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
 
 	skb = skb_dequeue(&call->rx_oos_queue);
 	if (skb) {
+		rxrpc_see_skb(skb);
 		sp = rxrpc_skb(skb);
 
 		_debug("drain OOS packet %d [%d]",
@@ -427,6 +432,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
 
 			/* find out what the next packet is */
 			skb = skb_peek(&call->rx_oos_queue);
+			rxrpc_see_skb(skb);
 			if (skb)
 				call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
 			else
@@ -576,6 +582,7 @@ process_further:
 	if (!skb)
 		return -EAGAIN;
 
+	rxrpc_see_skb(skb);
 	_net("deferred skb %p", skb);
 
 	sp = rxrpc_skb(skb);
@@ -625,7 +632,7 @@ process_further:
 		if (ack.reason == RXRPC_ACK_PING) {
 			_proto("Rx ACK %%%u PING Request", latest);
 			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
-					  sp->hdr.serial, true);
+					  skb->priority, sp->hdr.serial, true);
 		}
 
 		/* discard any out-of-order or duplicate ACKs */
@@ -832,11 +839,6 @@ void rxrpc_process_call(struct work_struct *work)
 	       call->debug_id, rxrpc_call_states[call->state], call->events,
 	       (jiffies - call->creation_jif) / (HZ / 10));
 
-	if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
-		_debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
-		return;
-	}
-
 	if (!call->conn)
 		goto skip_msg_init;
 
@@ -1155,8 +1157,7 @@ skip_msg_init:
 	goto maybe_reschedule;
 
 send_ACK_with_skew:
-	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
-			    ntohl(ack.serial));
+	ack.maxSkew = htons(call->ackr_skew);
 send_ACK:
 	mtu = call->conn->params.peer->if_mtu;
 	mtu -= call->conn->params.peer->hdrsize;
@@ -1246,7 +1247,8 @@ send_message_2:
 		case RXRPC_CALL_SERVER_ACK_REQUEST:
 			_debug("start ACK timer");
 			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
-					  call->ackr_serial, false);
+					  call->ackr_skew, call->ackr_serial,
+					  false);
 		default:
 			break;
 		}
@@ -1281,7 +1283,6 @@ maybe_reschedule:
 	}
 
 error:
-	clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
 	kfree(acks);
 
 	/* because we don't want two CPUs both processing the work item for one
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ae057e0740f3..e7cbcc4a87cf 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -127,10 +127,11 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
 	INIT_WORK(&call->processor, &rxrpc_process_call);
 	INIT_LIST_HEAD(&call->link);
+	INIT_LIST_HEAD(&call->chan_wait_link);
 	INIT_LIST_HEAD(&call->accept_link);
 	skb_queue_head_init(&call->rx_queue);
 	skb_queue_head_init(&call->rx_oos_queue);
-	init_waitqueue_head(&call->tx_waitq);
+	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->lock);
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
@@ -167,10 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
 	sock_hold(&rx->sk);
 	call->socket = rx;
 	call->rx_data_post = 1;
-
-	call->local = rx->local;
 	call->service_id = srx->srx_service;
-	call->in_clientflag = 0;
 
 	_leave(" = %p", call);
 	return call;
@@ -318,11 +316,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
 	candidate->socket	= rx;
 	candidate->conn		= conn;
+	candidate->peer		= conn->params.peer;
 	candidate->cid		= sp->hdr.cid;
 	candidate->call_id	= sp->hdr.callNumber;
-	candidate->channel	= chan;
 	candidate->rx_data_post	= 0;
 	candidate->state	= RXRPC_CALL_SERVER_ACCEPTING;
+	candidate->flags	|= (1 << RXRPC_CALL_IS_SERVICE);
 	if (conn->security_ix > 0)
 		candidate->state = RXRPC_CALL_SERVER_SECURING;
 
@@ -332,7 +331,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	call = rcu_dereference_protected(conn->channels[chan].call,
 					 lockdep_is_held(&conn->channel_lock));
 
-	_debug("channel[%u] is %p", candidate->channel, call);
+	_debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call);
 	if (call && call->call_id == sp->hdr.callNumber) {
 		/* already set; must've been a duplicate packet */
 		_debug("extant call [%d]", call->state);
@@ -360,7 +359,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 		       call->debug_id, rxrpc_call_states[call->state]);
 
 		if (call->state >= RXRPC_CALL_COMPLETE) {
-			__rxrpc_disconnect_call(call);
+			__rxrpc_disconnect_call(conn, call);
 		} else {
 			spin_unlock(&conn->channel_lock);
 			kmem_cache_free(rxrpc_call_jar, candidate);
@@ -387,6 +386,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	rcu_assign_pointer(conn->channels[chan].call, call);
 	sock_hold(&rx->sk);
 	rxrpc_get_connection(conn);
+	rxrpc_get_peer(call->peer);
 	spin_unlock(&conn->channel_lock);
 
 	spin_lock(&conn->params.peer->lock);
@@ -397,10 +397,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock_bh(&rxrpc_call_lock);
 
-	call->local = conn->params.local;
-	call->epoch = conn->proto.epoch;
 	call->service_id = conn->params.service_id;
-	call->in_clientflag = RXRPC_CLIENT_INITIATED;
 
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
@@ -569,12 +566,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 
 	read_lock_bh(&rx->call_lock);
 
-	/* mark all the calls as no longer wanting incoming packets */
-	for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
-		call = rb_entry(p, struct rxrpc_call, sock_node);
-		rxrpc_mark_call_released(call);
-	}
-
 	/* kill the not-yet-accepted incoming calls */
 	list_for_each_entry(call, &rx->secureq, accept_link) {
 		rxrpc_mark_call_released(call);
@@ -584,6 +575,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 		rxrpc_mark_call_released(call);
 	}
 
+	/* mark all the calls as no longer wanting incoming packets */
+	for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+		rxrpc_mark_call_released(call);
+	}
+
 	read_unlock_bh(&rx->call_lock);
 	_leave("");
 }
@@ -616,6 +613,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
 	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
 
 	rxrpc_purge_queue(&call->rx_queue);
+	rxrpc_put_peer(call->peer);
 	kmem_cache_free(rxrpc_call_jar, call);
 }
 
@@ -682,8 +680,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
 	struct rxrpc_call *call =
 		container_of(work, struct rxrpc_call, destroyer);
 
-	_enter("%p{%d,%d,%p}",
-	       call, atomic_read(&call->usage), call->channel, call->conn);
+	_enter("%p{%d,%x,%p}",
+	       call, atomic_read(&call->usage), call->cid, call->conn);
 
 	ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 9e91f27b0d0f..349402b08e5a 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -7,6 +7,68 @@
  * modify it under the terms of the GNU General Public Licence
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * Client connections can be in one of a number of cache states:
+ *
+ *  (1) INACTIVE - The connection is not held in any list and may not have been
+ *      exposed to the world.  If it has been previously exposed, it was
+ *      discarded from the idle list after expiring.
+ *
+ *  (2) WAITING - The connection is waiting for the number of client conns to
+ *      drop below the maximum capacity.  Calls may be in progress upon it from
+ *      when it was active and got culled.
+ *
+ *	The connection is on the rxrpc_waiting_client_conns list which is kept
+ *	in to-be-granted order.  Culled conns with waiters go to the back of
+ *	the queue just like new conns.
+ *
+ *  (3) ACTIVE - The connection has at least one call in progress upon it, it
+ *      may freely grant available channels to new calls and calls may be
+ *      waiting on it for channels to become available.
+ *
+ *	The connection is on the rxrpc_active_client_conns list which is kept
+ *	in activation order for culling purposes.
+ *
+ *	rxrpc_nr_active_client_conns is held incremented also.
+ *
+ *  (4) CULLED - The connection got summarily culled to try and free up
+ *      capacity.  Calls currently in progress on the connection are allowed to
+ *      continue, but new calls will have to wait.  There can be no waiters in
+ *      this state - the conn would have to go to the WAITING state instead.
+ *
+ *  (5) IDLE - The connection has no calls in progress upon it and must have
+ *      been exposed to the world (ie. the EXPOSED flag must be set).  When it
+ *      expires, the EXPOSED flag is cleared and the connection transitions to
+ *      the INACTIVE state.
+ *
+ *	The connection is on the rxrpc_idle_client_conns list which is kept in
+ *	order of how soon they'll expire.
+ *
+ * There are flags of relevance to the cache:
+ *
+ *  (1) EXPOSED - The connection ID got exposed to the world.  If this flag is
+ *      set, an extra ref is added to the connection preventing it from being
+ *      reaped when it has no calls outstanding.  This flag is cleared and the
+ *      ref dropped when a conn is discarded from the idle list.
+ *
+ *      This allows us to move terminal call state retransmission to the
+ *      connection and to discard the call immediately we think it is done
+ *      with.  It also give us a chance to reuse the connection.
+ *
+ *  (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ *      should not be reused.  This is set when an exclusive connection is used
+ *      or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations.  If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -16,22 +78,37 @@
 #include <linux/timer.h>
 #include "ar-internal.h"
 
+__read_mostly unsigned int rxrpc_max_client_connections = 1000;
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static unsigned int rxrpc_nr_client_conns;
+static unsigned int rxrpc_nr_active_client_conns;
+static __read_mostly bool rxrpc_kill_all_client_conns;
+
+static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
+static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
+static LIST_HEAD(rxrpc_waiting_client_conns);
+static LIST_HEAD(rxrpc_active_client_conns);
+static LIST_HEAD(rxrpc_idle_client_conns);
+
 /*
  * We use machine-unique IDs for our client connections.
  */
 DEFINE_IDR(rxrpc_client_conn_ids);
 static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
 
+static void rxrpc_cull_active_client_conns(void);
+static void rxrpc_discard_expired_client_conns(struct work_struct *);
+
+static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
+			    rxrpc_discard_expired_client_conns);
+
 /*
  * Get a connection ID and epoch for a client connection from the global pool.
  * The connection struct pointer is then recorded in the idr radix tree.  The
  * epoch is changed if this wraps.
- *
- * TODO: The IDR tree gets very expensive on memory if the connection IDs are
- * widely scattered throughout the number space, so we shall need to retire
- * connections that have, say, an ID more than four times the maximum number of
- * client conns away from the current allocation point to try and keep the IDs
- * concentrated.  We will also need to retire connections from an old epoch.
  */
 static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
 					  gfp_t gfp)
@@ -114,8 +191,7 @@ void rxrpc_destroy_client_conn_ids(void)
 }
 
 /*
- * Allocate a client connection.  The caller must take care to clear any
- * padding bytes in *cp.
+ * Allocate a client connection.
  */
 static struct rxrpc_connection *
 rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
@@ -131,6 +207,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	atomic_set(&conn->usage, 1);
+	if (conn->params.exclusive)
+		__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
 	conn->params		= *cp;
 	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
 	conn->state		= RXRPC_CONN_CLIENT;
@@ -148,7 +228,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 		goto error_2;
 
 	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
+	list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
 	write_unlock(&rxrpc_connection_lock);
 
 	/* We steal the caller's peer ref. */
@@ -170,32 +250,68 @@ error_0:
 }
 
 /*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Determine if a connection may be reused.
  */
-int rxrpc_connect_call(struct rxrpc_call *call,
-		       struct rxrpc_conn_parameters *cp,
-		       struct sockaddr_rxrpc *srx,
-		       gfp_t gfp)
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+	int id_cursor, id, distance, limit;
+
+	if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+		goto dont_reuse;
+
+	if (conn->proto.epoch != rxrpc_epoch)
+		goto mark_dont_reuse;
+
+	/* The IDR tree gets very expensive on memory if the connection IDs are
+	 * widely scattered throughout the number space, so we shall want to
+	 * kill off connections that, say, have an ID more than about four
+	 * times the maximum number of client conns away from the current
+	 * allocation point to try and keep the IDs concentrated.
+	 */
+	id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+	id = conn->proto.cid >> RXRPC_CIDSHIFT;
+	distance = id - id_cursor;
+	if (distance < 0)
+		distance = -distance;
+	limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+	if (distance > limit)
+		goto mark_dont_reuse;
+
+	return true;
+
+mark_dont_reuse:
+	set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+	return false;
+}
+
+/*
+ * Create or find a client connection to use for a call.
+ *
+ * If we return with a connection, the call will be on its waiting list.  It's
+ * left to the caller to assign a channel and wake up the call.
+ */
+static int rxrpc_get_client_conn(struct rxrpc_call *call,
+				 struct rxrpc_conn_parameters *cp,
+				 struct sockaddr_rxrpc *srx,
+				 gfp_t gfp)
 {
 	struct rxrpc_connection *conn, *candidate = NULL;
 	struct rxrpc_local *local = cp->local;
 	struct rb_node *p, **pp, *parent;
 	long diff;
-	int chan;
-
-	DECLARE_WAITQUEUE(myself, current);
+	int ret = -ENOMEM;
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
 	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
 	if (!cp->peer)
-		return -ENOMEM;
+		goto error;
 
+	/* If the connection is not meant to be exclusive, search the available
+	 * connections to see if the connection we want to use already exists.
+	 */
 	if (!cp->exclusive) {
-		/* Search for a existing client connection unless this is going
-		 * to be a connection that's used exclusively for a single call.
-		 */
 		_debug("search 1");
 		spin_lock(&local->client_conns_lock);
 		p = local->client_conns.rb_node;
@@ -206,39 +322,55 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 			diff = (cmp(peer) ?:
 				cmp(key) ?:
 				cmp(security_level));
-			if (diff < 0)
+#undef cmp
+			if (diff < 0) {
 				p = p->rb_left;
-			else if (diff > 0)
+			} else if (diff > 0) {
 				p = p->rb_right;
-			else
-				goto found_extant_conn;
+			} else {
+				if (rxrpc_may_reuse_conn(conn) &&
+				    rxrpc_get_connection_maybe(conn))
+					goto found_extant_conn;
+				/* The connection needs replacing.  It's better
+				 * to effect that when we have something to
+				 * replace it with so that we don't have to
+				 * rebalance the tree twice.
+				 */
+				break;
+			}
 		}
 		spin_unlock(&local->client_conns_lock);
 	}
 
-	/* We didn't find a connection or we want an exclusive one. */
-	_debug("get new conn");
+	/* There wasn't a connection yet or we need an exclusive connection.
+	 * We need to create a candidate and then potentially redo the search
+	 * in case we're racing with another thread also trying to connect on a
+	 * shareable connection.
+	 */
+	_debug("new conn");
 	candidate = rxrpc_alloc_client_connection(cp, gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
+	if (IS_ERR(candidate)) {
+		ret = PTR_ERR(candidate);
+		goto error_peer;
 	}
 
+	/* Add the call to the new connection's waiting list in case we're
+	 * going to have to wait for the connection to come live.  It's our
+	 * connection, so we want first dibs on the channel slots.  We would
+	 * normally have to take channel_lock but we do this before anyone else
+	 * can see the connection.
+	 */
+	list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+
 	if (cp->exclusive) {
-		/* Assign the call on an exclusive connection to channel 0 and
-		 * don't add the connection to the endpoint's shareable conn
-		 * lookup tree.
-		 */
-		_debug("exclusive chan 0");
-		conn = candidate;
-		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-		spin_lock(&conn->channel_lock);
-		chan = 0;
-		goto found_channel;
+		call->conn = candidate;
+		_leave(" = 0 [exclusive %d]", candidate->debug_id);
+		return 0;
 	}
 
-	/* We need to redo the search before attempting to add a new connection
-	 * lest we race with someone else adding a conflicting instance.
+	/* Publish the new connection for userspace to find.  We need to redo
+	 * the search before doing this lest we race with someone else adding a
+	 * conflicting instance.
 	 */
 	_debug("search 2");
 	spin_lock(&local->client_conns_lock);
@@ -249,124 +381,672 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 		parent = *pp;
 		conn = rb_entry(parent, struct rxrpc_connection, client_node);
 
+#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
 		diff = (cmp(peer) ?:
 			cmp(key) ?:
 			cmp(security_level));
-		if (diff < 0)
+#undef cmp
+		if (diff < 0) {
 			pp = &(*pp)->rb_left;
-		else if (diff > 0)
+		} else if (diff > 0) {
 			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_conn;
+		} else {
+			if (rxrpc_may_reuse_conn(conn) &&
+			    rxrpc_get_connection_maybe(conn))
+				goto found_extant_conn;
+			/* The old connection is from an outdated epoch. */
+			_debug("replace conn");
+			clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+			rb_replace_node(&conn->client_node,
+					&candidate->client_node,
+					&local->client_conns);
+			goto candidate_published;
+		}
 	}
 
-	/* The second search also failed; simply add the new connection with
-	 * the new call in channel 0.  Note that we need to take the channel
-	 * lock before dropping the client conn lock.
-	 */
 	_debug("new conn");
-	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
 	rb_link_node(&candidate->client_node, parent, pp);
 	rb_insert_color(&candidate->client_node, &local->client_conns);
-attached:
-	conn = candidate;
-	candidate = NULL;
 
-	atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-	spin_lock(&conn->channel_lock);
+candidate_published:
+	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+	call->conn = candidate;
 	spin_unlock(&local->client_conns_lock);
-	chan = 0;
+	_leave(" = 0 [new %d]", candidate->debug_id);
+	return 0;
 
-found_channel:
-	_debug("found chan");
-	call->conn	= conn;
-	call->channel	= chan;
-	call->epoch	= conn->proto.epoch;
-	call->cid	= conn->proto.cid | chan;
-	call->call_id	= ++conn->channels[chan].call_counter;
-	conn->channels[chan].call_id = call->call_id;
-	rcu_assign_pointer(conn->channels[chan].call, call);
+	/* We come here if we found a suitable connection already in existence.
+	 * Discard any candidate we may have allocated, and try to get a
+	 * channel on this one.
+	 */
+found_extant_conn:
+	_debug("found conn");
+	spin_unlock(&local->client_conns_lock);
 
-	_net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+	rxrpc_put_connection(candidate);
+	candidate = NULL;
 
+	spin_lock(&conn->channel_lock);
+	call->conn = conn;
+	list_add(&call->chan_wait_link, &conn->waiting_calls);
 	spin_unlock(&conn->channel_lock);
+	_leave(" = 0 [extant %d]", conn->debug_id);
+	return 0;
+
+error_peer:
 	rxrpc_put_peer(cp->peer);
 	cp->peer = NULL;
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-	return 0;
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
 
-	/* We found a potentially suitable connection already in existence.  If
-	 * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
-	 * reaper), discard any candidate we may have allocated, and try to get
-	 * a channel on this one, otherwise we have to replace it.
-	 */
-found_extant_conn:
-	_debug("found conn");
-	if (!rxrpc_get_connection_maybe(conn)) {
-		set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
-		rb_replace_node(&conn->client_node,
-				&candidate->client_node,
-				&local->client_conns);
-		clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
-		goto attached;
+/*
+ * Activate a connection.
+ */
+static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+{
+	conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+	rxrpc_nr_active_client_conns++;
+	list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+}
+
+/*
+ * Attempt to animate a connection for a new call.
+ *
+ * If it's not exclusive, the connection is in the endpoint tree, and we're in
+ * the conn's list of those waiting to grab a channel.  There is, however, a
+ * limit on the number of live connections allowed at any one time, so we may
+ * have to wait for capacity to become available.
+ *
+ * Note that a connection on the waiting queue might *also* have active
+ * channels if it has been culled to make space and then re-requested by a new
+ * call.
+ */
+static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+{
+	unsigned int nr_conns;
+
+	_enter("%d,%d", conn->debug_id, conn->cache_state);
+
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+		goto out;
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	nr_conns = rxrpc_nr_client_conns;
+	if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags))
+		rxrpc_nr_client_conns = nr_conns + 1;
+
+	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_ACTIVE:
+	case RXRPC_CONN_CLIENT_WAITING:
+		break;
+
+	case RXRPC_CONN_CLIENT_INACTIVE:
+	case RXRPC_CONN_CLIENT_CULLED:
+	case RXRPC_CONN_CLIENT_IDLE:
+		if (nr_conns >= rxrpc_max_client_connections)
+			goto wait_for_capacity;
+		goto activate_conn;
+
+	default:
+		BUG();
 	}
 
-	spin_unlock(&local->client_conns_lock);
+out_unlock:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+out:
+	_leave(" [%d]", conn->cache_state);
+	return;
 
-	rxrpc_put_connection(candidate);
+activate_conn:
+	_debug("activate");
+	rxrpc_activate_conn(conn);
+	goto out_unlock;
+
+wait_for_capacity:
+	_debug("wait");
+	conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+	list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+	goto out_unlock;
+}
+
+/*
+ * Deactivate a channel.
+ */
+static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn,
+					 unsigned int channel)
+{
+	struct rxrpc_channel *chan = &conn->channels[channel];
+
+	rcu_assign_pointer(chan->call, NULL);
+	conn->active_chans &= ~(1 << channel);
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+				       unsigned int channel)
+{
+	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_call *call = list_entry(conn->waiting_calls.next,
+					     struct rxrpc_call, chan_wait_link);
+	u32 call_id = chan->call_counter + 1;
+
+	list_del_init(&call->chan_wait_link);
+	conn->active_chans |= 1 << channel;
+	call->peer	= rxrpc_get_peer(conn->params.peer);
+	call->cid	= conn->proto.cid | channel;
+	call->call_id	= call_id;
+
+	_net("CONNECT call %08x:%08x as call %d on conn %d",
+	     call->cid, call->call_id, call->debug_id, conn->debug_id);
+
+	/* Paired with the read barrier in rxrpc_wait_for_channel().  This
+	 * orders cid and epoch in the connection wrt to call_id without the
+	 * need to take the channel_lock.
+	 *
+	 * We provisionally assign a callNumber at this point, but we don't
+	 * confirm it until the call is about to be exposed.
+	 *
+	 * TODO: Pair with a barrier in the data_ready handler when that looks
+	 * at the call ID through a connection channel.
+	 */
+	smp_wmb();
+	chan->call_id	= call_id;
+	rcu_assign_pointer(chan->call, call);
+	wake_up(&call->waitq);
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_connection *conn)
+{
+	unsigned char mask;
+
+	_enter("%d", conn->debug_id);
+
+	if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE ||
+	    conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+		return;
+
+	spin_lock(&conn->channel_lock);
+
+	while (!list_empty(&conn->waiting_calls) &&
+	       (mask = ~conn->active_chans,
+		mask &= RXRPC_ACTIVE_CHANS_MASK,
+		mask != 0))
+		rxrpc_activate_one_channel(conn, __ffs(mask));
+
+	spin_unlock(&conn->channel_lock);
+	_leave("");
+}
+
+/*
+ * Wait for a callNumber and a channel to be granted to a call.
+ */
+static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
+{
+	int ret = 0;
+
+	_enter("%d", call->debug_id);
+
+	if (!call->call_id) {
+		DECLARE_WAITQUEUE(myself, current);
 
-	if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
 		if (!gfpflags_allow_blocking(gfp)) {
-			rxrpc_put_connection(conn);
-			_leave(" = -EAGAIN");
-			return -EAGAIN;
+			ret = -EAGAIN;
+			goto out;
 		}
 
-		add_wait_queue(&conn->channel_wq, &myself);
+		add_wait_queue_exclusive(&call->waitq, &myself);
 		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (atomic_add_unless(&conn->avail_chans, -1, 0))
+			if (call->call_id)
 				break;
-			if (signal_pending(current))
-				goto interrupted;
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
 			schedule();
 		}
-		remove_wait_queue(&conn->channel_wq, &myself);
+		remove_wait_queue(&call->waitq, &myself);
 		__set_current_state(TASK_RUNNING);
 	}
 
-	/* The connection allegedly now has a free channel and we can now
-	 * attach the call to it.
-	 */
+	/* Paired with the write barrier in rxrpc_activate_one_channel(). */
+	smp_rmb();
+
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+		       struct rxrpc_conn_parameters *cp,
+		       struct sockaddr_rxrpc *srx,
+		       gfp_t gfp)
+{
+	int ret;
+
+	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+	rxrpc_discard_expired_client_conns(NULL);
+	rxrpc_cull_active_client_conns();
+
+	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+	if (ret < 0)
+		return ret;
+
+	rxrpc_animate_client_conn(call->conn);
+	rxrpc_activate_channels(call->conn);
+
+	ret = rxrpc_wait_for_channel(call, gfp);
+	if (ret < 0)
+		rxrpc_disconnect_client_call(call);
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Note that a connection is about to be exposed to the world.  Once it is
+ * exposed, we maintain an extra ref on it that stops it from being summarily
+ * discarded before it's (a) had a chance to deal with retransmission and (b)
+ * had a chance at re-use (the per-connection security negotiation is
+ * expensive).
+ */
+static void rxrpc_expose_client_conn(struct rxrpc_connection *conn)
+{
+	if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+		rxrpc_get_connection(conn);
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan =
+		&conn->channels[call->cid & RXRPC_CHANNELMASK];
+
+	if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+		/* Mark the call ID as being used.  If the callNumber counter
+		 * exceeds ~2 billion, we kill the connection after its
+		 * outstanding calls have finished so that the counter doesn't
+		 * wrap.
+		 */
+		chan->call_counter++;
+		if (chan->call_counter >= INT_MAX)
+			set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+		rxrpc_expose_client_conn(conn);
+	}
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_call *call)
+{
+	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan = &conn->channels[channel];
+
+	call->conn = NULL;
+
 	spin_lock(&conn->channel_lock);
 
-	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-		if (!conn->channels[chan].call)
-			goto found_channel;
-	BUG();
+	/* Calls that have never actually been assigned a channel can simply be
+	 * discarded.  If the conn didn't get used either, it will follow
+	 * immediately unless someone else grabs it in the meantime.
+	 */
+	if (!list_empty(&call->chan_wait_link)) {
+		_debug("call is waiting");
+		ASSERTCMP(call->call_id, ==, 0);
+		ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+		list_del_init(&call->chan_wait_link);
+
+		/* We must deactivate or idle the connection if it's now
+		 * waiting for nothing.
+		 */
+		spin_lock(&rxrpc_client_conn_cache_lock);
+		if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
+		    list_empty(&conn->waiting_calls) &&
+		    !conn->active_chans)
+			goto idle_connection;
+		goto out;
+	}
+
+	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+	ASSERTCMP(atomic_read(&conn->usage), >=, 2);
+
+	/* If a client call was exposed to the world, we save the result for
+	 * retransmission.
+	 *
+	 * We use a barrier here so that the call number and abort code can be
+	 * read without needing to take a lock.
+	 *
+	 * TODO: Make the incoming packet handler check this and handle
+	 * terminal retransmission without requiring access to the call.
+	 */
+	if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+		_debug("exposed %u,%u", call->call_id, call->local_abort);
+		__rxrpc_disconnect_call(conn, call);
+	}
+
+	/* See if we can pass the channel directly to another call. */
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE &&
+	    !list_empty(&conn->waiting_calls)) {
+		_debug("pass chan");
+		rxrpc_activate_one_channel(conn, channel);
+		goto out_2;
+	}
+
+	/* Things are more complex and we need the cache lock.  We might be
+	 * able to simply idle the conn or it might now be lurking on the wait
+	 * list.  It might even get moved back to the active list whilst we're
+	 * waiting for the lock.
+	 */
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_ACTIVE:
+		if (list_empty(&conn->waiting_calls)) {
+			rxrpc_deactivate_one_channel(conn, channel);
+			if (!conn->active_chans) {
+				rxrpc_nr_active_client_conns--;
+				goto idle_connection;
+			}
+			goto out;
+		}
+
+		_debug("pass chan 2");
+		rxrpc_activate_one_channel(conn, channel);
+		goto out;
+
+	case RXRPC_CONN_CLIENT_CULLED:
+		rxrpc_deactivate_one_channel(conn, channel);
+		ASSERT(list_empty(&conn->waiting_calls));
+		if (!conn->active_chans)
+			goto idle_connection;
+		goto out;
+
+	case RXRPC_CONN_CLIENT_WAITING:
+		rxrpc_deactivate_one_channel(conn, channel);
+		goto out;
+
+	default:
+		BUG();
+	}
 
-interrupted:
-	remove_wait_queue(&conn->channel_wq, &myself);
-	__set_current_state(TASK_RUNNING);
+out:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+out_2:
+	spin_unlock(&conn->channel_lock);
 	rxrpc_put_connection(conn);
-	rxrpc_put_peer(cp->peer);
-	cp->peer = NULL;
-	_leave(" = -ERESTARTSYS");
-	return -ERESTARTSYS;
+	_leave("");
+	return;
+
+idle_connection:
+	/* As no channels remain active, the connection gets deactivated
+	 * immediately or moved to the idle list for a short while.
+	 */
+	if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+		_debug("make idle");
+		conn->idle_timestamp = jiffies;
+		conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
+		list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
+		if (rxrpc_idle_client_conns.next == &conn->cache_link &&
+		    !rxrpc_kill_all_client_conns)
+			queue_delayed_work(rxrpc_workqueue,
+					   &rxrpc_client_conn_reap,
+					   rxrpc_conn_idle_client_expiry);
+	} else {
+		_debug("make inactive");
+		conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+		list_del_init(&conn->cache_link);
+	}
+	goto out;
 }
 
 /*
- * Remove a client connection from the local endpoint's tree, thereby removing
- * it as a target for reuse for new client calls.
+ * Clean up a dead client connection.
  */
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+static struct rxrpc_connection *
+rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
 {
+	struct rxrpc_connection *next;
 	struct rxrpc_local *local = conn->params.local;
+	unsigned int nr_conns;
 
-	spin_lock(&local->client_conns_lock);
-	if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
-		rb_erase(&conn->client_node, &local->client_conns);
-	spin_unlock(&local->client_conns_lock);
+	if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) {
+		spin_lock(&local->client_conns_lock);
+		if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS,
+				       &conn->flags))
+			rb_erase(&conn->client_node, &local->client_conns);
+		spin_unlock(&local->client_conns_lock);
+	}
 
 	rxrpc_put_client_connection_id(conn);
+
+	ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE);
+
+	if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags))
+		return NULL;
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+	nr_conns = --rxrpc_nr_client_conns;
+
+	next = NULL;
+	if (nr_conns < rxrpc_max_client_connections &&
+	    !list_empty(&rxrpc_waiting_client_conns)) {
+		next = list_entry(rxrpc_waiting_client_conns.next,
+				  struct rxrpc_connection, cache_link);
+		rxrpc_get_connection(next);
+		rxrpc_activate_conn(next);
+	}
+
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+	rxrpc_kill_connection(conn);
+
+	if (next)
+		rxrpc_activate_channels(next);
+
+	/* We need to get rid of the temporary ref we took upon next, but we
+	 * can't call rxrpc_put_connection() recursively.
+	 */
+	return next;
+}
+
+/*
+ * Clean up a dead client connections.
+ */
+void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *next;
+
+	do {
+		_enter("%p{u=%d,d=%d}",
+		       conn, atomic_read(&conn->usage), conn->debug_id);
+
+		next = rxrpc_put_one_client_conn(conn);
+
+		if (!next)
+			break;
+		conn = next;
+	} while (atomic_dec_and_test(&conn->usage));
+
+	_leave("");
+}
+
+/*
+ * Kill the longest-active client connections to make room for new ones.
+ */
+static void rxrpc_cull_active_client_conns(void)
+{
+	struct rxrpc_connection *conn;
+	unsigned int nr_conns = rxrpc_nr_client_conns;
+	unsigned int nr_active, limit;
+
+	_enter("");
+
+	ASSERTCMP(nr_conns, >=, 0);
+	if (nr_conns < rxrpc_max_client_connections) {
+		_leave(" [ok]");
+		return;
+	}
+	limit = rxrpc_reap_client_connections;
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+	nr_active = rxrpc_nr_active_client_conns;
+
+	while (nr_active > limit) {
+		ASSERT(!list_empty(&rxrpc_active_client_conns));
+		conn = list_entry(rxrpc_active_client_conns.next,
+				  struct rxrpc_connection, cache_link);
+		ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+
+		if (list_empty(&conn->waiting_calls)) {
+			conn->cache_state = RXRPC_CONN_CLIENT_CULLED;
+			list_del_init(&conn->cache_link);
+		} else {
+			conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+			list_move_tail(&conn->cache_link,
+				       &rxrpc_waiting_client_conns);
+		}
+
+		nr_active--;
+	}
+
+	rxrpc_nr_active_client_conns = nr_active;
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+	ASSERTCMP(nr_active, >=, 0);
+	_leave(" [culled]");
+}
+
+/*
+ * Discard expired client connections from the idle list.  Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+{
+	struct rxrpc_connection *conn;
+	unsigned long expiry, conn_expires_at, now;
+	unsigned int nr_conns;
+	bool did_discard = false;
+
+	_enter("%c", work ? 'w' : 'n');
+
+	if (list_empty(&rxrpc_idle_client_conns)) {
+		_leave(" [empty]");
+		return;
+	}
+
+	/* Don't double up on the discarding */
+	if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+		_leave(" [already]");
+		return;
+	}
+
+	/* We keep an estimate of what the number of conns ought to be after
+	 * we've discarded some so that we don't overdo the discarding.
+	 */
+	nr_conns = rxrpc_nr_client_conns;
+
+next:
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	if (list_empty(&rxrpc_idle_client_conns))
+		goto out;
+
+	conn = list_entry(rxrpc_idle_client_conns.next,
+			  struct rxrpc_connection, cache_link);
+	ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
+
+	if (!rxrpc_kill_all_client_conns) {
+		/* If the number of connections is over the reap limit, we
+		 * expedite discard by reducing the expiry timeout.  We must,
+		 * however, have at least a short grace period to be able to do
+		 * final-ACK or ABORT retransmission.
+		 */
+		expiry = rxrpc_conn_idle_client_expiry;
+		if (nr_conns > rxrpc_reap_client_connections)
+			expiry = rxrpc_conn_idle_client_fast_expiry;
+
+		conn_expires_at = conn->idle_timestamp + expiry;
+
+		now = READ_ONCE(jiffies);
+		if (time_after(conn_expires_at, now))
+			goto not_yet_expired;
+	}
+
+	_debug("discard conn %d", conn->debug_id);
+	if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+		BUG();
+	conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+	list_del_init(&conn->cache_link);
+
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+
+	/* When we cleared the EXPOSED flag, we took on responsibility for the
+	 * reference that that had on the usage count.  We deal with that here.
+	 * If someone re-sets the flag and re-gets the ref, that's fine.
+	 */
+	rxrpc_put_connection(conn);
+	did_discard = true;
+	nr_conns--;
+	goto next;
+
+not_yet_expired:
+	/* The connection at the front of the queue hasn't yet expired, so
+	 * schedule the work item for that point if we discarded something.
+	 *
+	 * We don't worry if the work item is already scheduled - it can look
+	 * after rescheduling itself at a later time.  We could cancel it, but
+	 * then things get messier.
+	 */
+	_debug("not yet");
+	if (!rxrpc_kill_all_client_conns)
+		queue_delayed_work(rxrpc_workqueue,
+				   &rxrpc_client_conn_reap,
+				   conn_expires_at - now);
+
+out:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_unlock(&rxrpc_client_conn_discard_mutex);
+	_leave("");
+}
+
+/*
+ * Preemptively destroy all the client connection records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_client_connections(void)
+{
+	_enter("");
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+	rxrpc_kill_all_client_conns = true;
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+
+	cancel_delayed_work(&rxrpc_client_conn_reap);
+
+	if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+		_debug("destroy: queue failed");
+
+	_leave("");
 }
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index cee0f35bc1cf..6296374df840 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -25,6 +25,114 @@
 #include "ar-internal.h"
 
 /*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+static void rxrpc_conn_retransmit(struct rxrpc_connection *conn,
+				  struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_channel *chan;
+	struct msghdr msg;
+	struct kvec iov;
+	struct {
+		struct rxrpc_wire_header whdr;
+		union {
+			struct {
+				__be32 code;
+			} abort;
+			struct {
+				struct rxrpc_ackpacket ack;
+				u8 padding[3];
+				struct rxrpc_ackinfo info;
+			};
+		};
+	} __attribute__((packed)) pkt;
+	size_t len;
+	u32 serial, mtu, call_id;
+
+	_enter("%d", conn->debug_id);
+
+	chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+
+	/* If the last call got moved on whilst we were waiting to run, just
+	 * ignore this packet.
+	 */
+	call_id = READ_ONCE(chan->last_call);
+	/* Sync with __rxrpc_disconnect_call() */
+	smp_rmb();
+	if (call_id != sp->hdr.callNumber)
+		return;
+
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	pkt.whdr.epoch		= htonl(sp->hdr.epoch);
+	pkt.whdr.cid		= htonl(sp->hdr.cid);
+	pkt.whdr.callNumber	= htonl(sp->hdr.callNumber);
+	pkt.whdr.seq		= 0;
+	pkt.whdr.type		= chan->last_type;
+	pkt.whdr.flags		= conn->out_clientflag;
+	pkt.whdr.userStatus	= 0;
+	pkt.whdr.securityIndex	= conn->security_ix;
+	pkt.whdr._rsvd		= 0;
+	pkt.whdr.serviceId	= htons(chan->last_service_id);
+
+	len = sizeof(pkt.whdr);
+	switch (chan->last_type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		pkt.abort.code	= htonl(chan->last_abort);
+		len += sizeof(pkt.abort);
+		break;
+
+	case RXRPC_PACKET_TYPE_ACK:
+		mtu = conn->params.peer->if_mtu;
+		mtu -= conn->params.peer->hdrsize;
+		pkt.ack.bufferSpace	= 0;
+		pkt.ack.maxSkew		= htons(skb->priority);
+		pkt.ack.firstPacket	= htonl(chan->last_seq);
+		pkt.ack.previousPacket	= htonl(chan->last_seq - 1);
+		pkt.ack.serial		= htonl(sp->hdr.serial);
+		pkt.ack.reason		= RXRPC_ACK_DUPLICATE;
+		pkt.ack.nAcks		= 0;
+		pkt.info.rxMTU		= htonl(rxrpc_rx_mtu);
+		pkt.info.maxMTU		= htonl(mtu);
+		pkt.info.rwind		= htonl(rxrpc_rx_window_size);
+		pkt.info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
+		len += sizeof(pkt.ack) + sizeof(pkt.info);
+		break;
+	}
+
+	/* Resync with __rxrpc_disconnect_call() and check that the last call
+	 * didn't get advanced whilst we were filling out the packets.
+	 */
+	smp_rmb();
+	if (READ_ONCE(chan->last_call) != call_id)
+		return;
+
+	iov.iov_base	= &pkt;
+	iov.iov_len	= len;
+
+	serial = atomic_inc_return(&conn->serial);
+	pkt.whdr.serial = htonl(serial);
+
+	switch (chan->last_type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		_proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
+		break;
+	case RXRPC_PACKET_TYPE_ACK:
+		_proto("Tx ACK %%%u [re]", serial);
+		break;
+	}
+
+	kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+	_leave("");
+	return;
+}
+
+/*
  * pass a connection-level abort onto all calls on that connection
  */
 static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
@@ -166,6 +274,12 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 	_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
 
 	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_DATA:
+	case RXRPC_PACKET_TYPE_ACK:
+		rxrpc_conn_retransmit(conn, skb);
+		rxrpc_free_skb(skb);
+		return 0;
+
 	case RXRPC_PACKET_TYPE_ABORT:
 		if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
 			return -EPROTO;
@@ -277,6 +391,7 @@ void rxrpc_process_connection(struct work_struct *work)
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
+		rxrpc_see_skb(skb);
 		ret = rxrpc_process_event(conn, skb, &abort_code);
 		switch (ret) {
 		case -EPROTO:
@@ -365,6 +480,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 	whdr.type = RXRPC_PACKET_TYPE_ABORT;
 
 	while ((skb = skb_dequeue(&local->reject_queue))) {
+		rxrpc_see_skb(skb);
 		sp = rxrpc_skb(skb);
 		switch (sa.sa.sa_family) {
 		case AF_INET:
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 896d84493a05..5b45b6c367e7 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -1,6 +1,6 @@
-/* RxRPC virtual connection handler
+/* RxRPC virtual connection handler, common bits.
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -15,8 +15,6 @@
 #include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
 /*
@@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60;
 static void rxrpc_connection_reaper(struct work_struct *work);
 
 LIST_HEAD(rxrpc_connections);
+LIST_HEAD(rxrpc_connection_proc_list);
 DEFINE_RWLOCK(rxrpc_connection_lock);
 static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
 
+static void rxrpc_destroy_connection(struct rcu_head *);
+
 /*
  * allocate a new connection
  */
@@ -41,21 +42,19 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
 
 	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
 	if (conn) {
+		INIT_LIST_HEAD(&conn->cache_link);
 		spin_lock_init(&conn->channel_lock);
-		init_waitqueue_head(&conn->channel_wq);
+		INIT_LIST_HEAD(&conn->waiting_calls);
 		INIT_WORK(&conn->processor, &rxrpc_process_connection);
+		INIT_LIST_HEAD(&conn->proc_link);
 		INIT_LIST_HEAD(&conn->link);
 		skb_queue_head_init(&conn->rx_queue);
 		conn->security = &rxrpc_no_security;
 		spin_lock_init(&conn->state_lock);
-		/* We maintain an extra ref on the connection whilst it is
-		 * on the rxrpc_connections list.
-		 */
-		atomic_set(&conn->usage, 2);
 		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
-		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
 		conn->size_align = 4;
 		conn->header_size = sizeof(struct rxrpc_wire_header);
+		conn->idle_timestamp = jiffies;
 	}
 
 	_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -153,25 +152,32 @@ not_found:
  * terminates.  The caller must hold the channel_lock and must release the
  * call's ref on the connection.
  */
-void __rxrpc_disconnect_call(struct rxrpc_call *call)
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+			     struct rxrpc_call *call)
 {
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[call->channel];
+	struct rxrpc_channel *chan =
+		&conn->channels[call->cid & RXRPC_CHANNELMASK];
 
-	_enter("%d,%d", conn->debug_id, call->channel);
+	_enter("%d,%x", conn->debug_id, call->cid);
 
 	if (rcu_access_pointer(chan->call) == call) {
 		/* Save the result of the call so that we can repeat it if necessary
 		 * through the channel, whilst disposing of the actual call record.
 		 */
-		chan->last_result = call->local_abort;
+		chan->last_service_id = call->service_id;
+		if (call->local_abort) {
+			chan->last_abort = call->local_abort;
+			chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+		} else {
+			chan->last_seq = call->rx_data_eaten;
+			chan->last_type = RXRPC_PACKET_TYPE_ACK;
+		}
+		/* Sync with rxrpc_conn_retransmit(). */
 		smp_wmb();
 		chan->last_call = chan->call_id;
 		chan->call_id = chan->call_counter;
 
 		rcu_assign_pointer(chan->call, NULL);
-		atomic_inc(&conn->avail_chans);
-		wake_up(&conn->channel_wq);
 	}
 
 	_leave("");
@@ -185,34 +191,52 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 {
 	struct rxrpc_connection *conn = call->conn;
 
+	if (rxrpc_is_client_call(call))
+		return rxrpc_disconnect_client_call(call);
+
 	spin_lock(&conn->channel_lock);
-	__rxrpc_disconnect_call(call);
+	__rxrpc_disconnect_call(conn, call);
 	spin_unlock(&conn->channel_lock);
 
 	call->conn = NULL;
+	conn->idle_timestamp = jiffies;
 	rxrpc_put_connection(conn);
 }
 
 /*
- * release a virtual connection
+ * Kill off a connection.
  */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
+void rxrpc_kill_connection(struct rxrpc_connection *conn)
 {
-	if (!conn)
-		return;
+	ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+	       !rcu_access_pointer(conn->channels[1].call) &&
+	       !rcu_access_pointer(conn->channels[2].call) &&
+	       !rcu_access_pointer(conn->channels[3].call));
+	ASSERT(list_empty(&conn->cache_link));
 
-	_enter("%p{u=%d,d=%d}",
-	       conn, atomic_read(&conn->usage), conn->debug_id);
+	write_lock(&rxrpc_connection_lock);
+	list_del_init(&conn->proc_link);
+	write_unlock(&rxrpc_connection_lock);
 
-	ASSERTCMP(atomic_read(&conn->usage), >, 1);
+	/* Drain the Rx queue.  Note that even though we've unpublished, an
+	 * incoming packet could still be being added to our Rx queue, so we
+	 * will need to drain it again in the RCU cleanup handler.
+	 */
+	rxrpc_purge_queue(&conn->rx_queue);
 
-	conn->put_time = ktime_get_seconds();
-	if (atomic_dec_return(&conn->usage) == 1) {
-		_debug("zombie");
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
-	}
+	/* Leave final destruction to RCU.  The connection processor work item
+	 * must carry a ref on the connection to prevent us getting here whilst
+	 * it is queued or running.
+	 */
+	call_rcu(&conn->rcu, rxrpc_destroy_connection);
+}
 
-	_leave("");
+/*
+ * release a virtual connection
+ */
+void __rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 }
 
 /*
@@ -242,19 +266,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
 }
 
 /*
- * reap dead connections
+ * reap dead service connections
  */
 static void rxrpc_connection_reaper(struct work_struct *work)
 {
 	struct rxrpc_connection *conn, *_p;
-	unsigned long reap_older_than, earliest, put_time, now;
+	unsigned long reap_older_than, earliest, idle_timestamp, now;
 
 	LIST_HEAD(graveyard);
 
 	_enter("");
 
-	now = ktime_get_seconds();
-	reap_older_than =  now - rxrpc_connection_expiry;
+	now = jiffies;
+	reap_older_than = now - rxrpc_connection_expiry * HZ;
 	earliest = ULONG_MAX;
 
 	write_lock(&rxrpc_connection_lock);
@@ -263,10 +287,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 		if (likely(atomic_read(&conn->usage) > 1))
 			continue;
 
-		put_time = READ_ONCE(conn->put_time);
-		if (time_after(put_time, reap_older_than)) {
-			if (time_before(put_time, earliest))
-				earliest = put_time;
+		idle_timestamp = READ_ONCE(conn->idle_timestamp);
+		_debug("reap CONN %d { u=%d,t=%ld }",
+		       conn->debug_id, atomic_read(&conn->usage),
+		       (long)reap_older_than - (long)idle_timestamp);
+
+		if (time_after(idle_timestamp, reap_older_than)) {
+			if (time_before(idle_timestamp, earliest))
+				earliest = idle_timestamp;
 			continue;
 		}
 
@@ -277,7 +305,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 			continue;
 
 		if (rxrpc_conn_is_client(conn))
-			rxrpc_unpublish_client_conn(conn);
+			BUG();
 		else
 			rxrpc_unpublish_service_conn(conn);
 
@@ -287,9 +315,9 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 
 	if (earliest != ULONG_MAX) {
 		_debug("reschedule reaper %ld", (long) earliest - now);
-		ASSERTCMP(earliest, >, now);
+		ASSERT(time_after(earliest, now));
 		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
-					 (earliest - now) * HZ);
+					 earliest - now);
 	}
 
 	while (!list_empty(&graveyard)) {
@@ -298,16 +326,15 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 		list_del_init(&conn->link);
 
 		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-		skb_queue_purge(&conn->rx_queue);
-		call_rcu(&conn->rcu, rxrpc_destroy_connection);
+		rxrpc_kill_connection(conn);
 	}
 
 	_leave("");
 }
 
 /*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
  */
 void __exit rxrpc_destroy_all_connections(void)
 {
@@ -316,6 +343,8 @@ void __exit rxrpc_destroy_all_connections(void)
 
 	_enter("");
 
+	rxrpc_destroy_all_client_connections();
+
 	rxrpc_connection_expiry = 0;
 	cancel_delayed_work(&rxrpc_connection_reap);
 	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -330,6 +359,8 @@ void __exit rxrpc_destroy_all_connections(void)
 	write_unlock(&rxrpc_connection_lock);
 	BUG_ON(leak);
 
+	ASSERT(list_empty(&rxrpc_connection_proc_list));
+
 	/* Make sure the local and peer records pinned by any dying connections
 	 * are released.
 	 */
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index fd9027ccba8f..316a92107fee 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -185,8 +185,14 @@ struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
 
 	rxrpc_get_local(local);
 
+	/* We maintain an extra ref on the connection whilst it is on
+	 * the rxrpc_connections list.
+	 */
+	atomic_set(&conn->usage, 2);
+
 	write_lock(&rxrpc_connection_lock);
 	list_add_tail(&conn->link, &rxrpc_connections);
+	list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
 	write_unlock(&rxrpc_connection_lock);
 
 	/* Make the connection a target for incoming packets. */
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 70bb77818dea..5e683dd21ab9 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -125,6 +125,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	bool terminal;
 	int ret, ackbit, ack;
 	u32 serial;
+	u16 skew;
 	u8 flags;
 
 	_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
@@ -133,6 +134,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	ASSERTCMP(sp->call, ==, NULL);
 	flags = sp->hdr.flags;
 	serial = sp->hdr.serial;
+	skew = skb->priority;
 
 	spin_lock(&call->lock);
 
@@ -231,7 +233,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
 	spin_unlock(&call->lock);
 	atomic_inc(&call->ackr_not_idle);
-	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
+	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false);
 	_leave(" = 0 [posted]");
 	return 0;
 
@@ -244,7 +246,7 @@ out:
 
 discard_and_ack:
 	_debug("discard and ACK packet %p", skb);
-	__rxrpc_propose_ACK(call, ack, serial, true);
+	__rxrpc_propose_ACK(call, ack, skew, serial, true);
 discard:
 	spin_unlock(&call->lock);
 	rxrpc_free_skb(skb);
@@ -252,7 +254,7 @@ discard:
 	return 0;
 
 enqueue_and_ack:
-	__rxrpc_propose_ACK(call, ack, serial, true);
+	__rxrpc_propose_ACK(call, ack, skew, serial, true);
 enqueue_packet:
 	_net("defer skb %p", skb);
 	spin_unlock(&call->lock);
@@ -304,7 +306,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	__be32 wtmp;
-	u32 hi_serial, abort_code;
+	u32 abort_code;
 
 	_enter("%p,%p", call, skb);
 
@@ -321,18 +323,12 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 	}
 #endif
 
-	/* track the latest serial number on this connection for ACK packet
-	 * information */
-	hi_serial = atomic_read(&call->conn->hi_serial);
-	while (sp->hdr.serial > hi_serial)
-		hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
-					   sp->hdr.serial);
-
 	/* request ACK generation for any ACK or DATA packet that requests
 	 * it */
 	if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
 		_proto("ACK Requested on %%%u", sp->hdr.serial);
-		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
+		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
+				  skb->priority, sp->hdr.serial, false);
 	}
 
 	switch (sp->hdr.type) {
@@ -570,7 +566,8 @@ done:
 
 /*
  * post connection-level events to the connection
- * - this includes challenges, responses and some aborts
+ * - this includes challenges, responses, some aborts and call terminal packet
+ *   retransmission.
  */
 static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 				      struct sk_buff *skb)
@@ -637,7 +634,7 @@ void rxrpc_data_ready(struct sock *sk)
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_local *local = sk->sk_user_data;
 	struct sk_buff *skb;
-	int ret;
+	int ret, skew;
 
 	_enter("%p", sk);
 
@@ -700,25 +697,64 @@ void rxrpc_data_ready(struct sock *sk)
 	rcu_read_lock();
 
 	conn = rxrpc_find_connection_rcu(local, skb);
-	if (!conn)
+	if (!conn) {
+		skb->priority = 0;
 		goto cant_route_call;
+	}
+
+	/* Note the serial number skew here */
+	skew = (int)sp->hdr.serial - (int)conn->hi_serial;
+	if (skew >= 0) {
+		if (skew > 0)
+			conn->hi_serial = sp->hdr.serial;
+		skb->priority = 0;
+	} else {
+		skew = -skew;
+		skb->priority = min(skew, 65535);
+	}
 
 	if (sp->hdr.callNumber == 0) {
 		/* Connection-level packet */
 		_debug("CONN %p {%d}", conn, conn->debug_id);
 		rxrpc_post_packet_to_conn(conn, skb);
+		goto out_unlock;
 	} else {
 		/* Call-bound packets are routed by connection channel. */
 		unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
 		struct rxrpc_channel *chan = &conn->channels[channel];
-		struct rxrpc_call *call = rcu_dereference(chan->call);
+		struct rxrpc_call *call;
+
+		/* Ignore really old calls */
+		if (sp->hdr.callNumber < chan->last_call)
+			goto discard_unlock;
+
+		if (sp->hdr.callNumber == chan->last_call) {
+			/* For the previous service call, if completed
+			 * successfully, we discard all further packets.
+			 */
+			if (rxrpc_conn_is_service(conn) &&
+			    (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
+			     sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+				goto discard_unlock;
+
+			/* But otherwise we need to retransmit the final packet
+			 * from data cached in the connection record.
+			 */
+			rxrpc_post_packet_to_conn(conn, skb);
+			goto out_unlock;
+		}
 
+		call = rcu_dereference(chan->call);
 		if (!call || atomic_read(&call->usage) == 0)
 			goto cant_route_call;
 
 		rxrpc_post_packet_to_call(call, skb);
+		goto out_unlock;
 	}
 
+discard_unlock:
+	rxrpc_free_skb(skb);
+out_unlock:
 	rcu_read_unlock();
 out:
 	return;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 31a3f86ef2f6..bcc6593b4cdb 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -93,6 +93,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
 	if (skb) {
 		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
+		rxrpc_see_skb(skb);
 		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
 
 		switch (sp->hdr.type) {
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f4bda06b7d2d..8a9917cba6fe 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -218,11 +218,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		ret = 0;
 	} else if (cmd != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
-	} else if (!call->in_clientflag &&
+	} else if (rxrpc_is_client_call(call) &&
 		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
 		/* request phase complete for this client call */
 		ret = -EPROTO;
-	} else if (call->in_clientflag &&
+	} else if (rxrpc_is_service_call(call) &&
 		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
 		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
 		/* Reply phase not begun or not complete for service call. */
@@ -390,7 +390,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 			  call->acks_winsz),
 	       *timeo);
 
-	add_wait_queue(&call->tx_waitq, &myself);
+	add_wait_queue(&call->waitq, &myself);
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -408,7 +408,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 		lock_sock(&rx->sk);
 	}
 
-	remove_wait_queue(&call->tx_waitq, &myself);
+	remove_wait_queue(&call->waitq, &myself);
 	set_current_state(TASK_RUNNING);
 	_leave(" = %d", ret);
 	return ret;
@@ -482,6 +482,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
 		/* the packet may be freed by rxrpc_process_call() before this
 		 * returns */
+		if (rxrpc_is_client_call(call))
+			rxrpc_expose_client_call(call);
 		ret = rxrpc_send_data_packet(call->conn, skb);
 		_net("sent skb %p", skb);
 	} else {
@@ -548,6 +550,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
+	rxrpc_see_skb(skb);
 
 	copied = 0;
 	do {
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index ced5f07444e5..060fb4892c39 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -46,7 +46,9 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
 
 static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 {
-	struct rxrpc_connection *conn;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx;
+	struct rxrpc_peer *peer;
 	struct rxrpc_call *call;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
@@ -60,15 +62,24 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 
 	call = list_entry(v, struct rxrpc_call, link);
 
-	sprintf(lbuff, "%pI4:%u",
-		&call->local->srx.transport.sin.sin_addr,
-		ntohs(call->local->srx.transport.sin.sin_port));
+	rx = READ_ONCE(call->socket);
+	if (rx) {
+		local = READ_ONCE(rx->local);
+		if (local)
+			sprintf(lbuff, "%pI4:%u",
+				&local->srx.transport.sin.sin_addr,
+				ntohs(local->srx.transport.sin.sin_port));
+		else
+			strcpy(lbuff, "no_local");
+	} else {
+		strcpy(lbuff, "no_socket");
+	}
 
-	conn = call->conn;
-	if (conn)
+	peer = call->peer;
+	if (peer)
 		sprintf(rbuff, "%pI4:%u",
-			&conn->params.peer->srx.transport.sin.sin_addr,
-			ntohs(conn->params.peer->srx.transport.sin.sin_port));
+			&peer->srx.transport.sin.sin_addr,
+			ntohs(peer->srx.transport.sin.sin_port));
 	else
 		strcpy(rbuff, "no_connection");
 
@@ -80,7 +91,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 		   call->service_id,
 		   call->cid,
 		   call->call_id,
-		   call->in_clientflag ? "Svc" : "Clt",
+		   rxrpc_is_service_call(call) ? "Svc" : "Clt",
 		   atomic_read(&call->usage),
 		   rxrpc_call_states[call->state],
 		   call->remote_abort ?: call->local_abort,
@@ -115,13 +126,13 @@ const struct file_operations rxrpc_call_seq_fops = {
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
 {
 	read_lock(&rxrpc_connection_lock);
-	return seq_list_start_head(&rxrpc_connections, *_pos);
+	return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
 }
 
 static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
 				       loff_t *pos)
 {
-	return seq_list_next(v, &rxrpc_connections, pos);
+	return seq_list_next(v, &rxrpc_connection_proc_list, pos);
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -134,7 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 	struct rxrpc_connection *conn;
 	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
 
-	if (v == &rxrpc_connections) {
+	if (v == &rxrpc_connection_proc_list) {
 		seq_puts(seq,
 			 "Proto Local                  Remote                "
 			 " SvID ConnID   End Use State    Key     "
@@ -143,7 +154,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
-	conn = list_entry(v, struct rxrpc_connection, link);
+	conn = list_entry(v, struct rxrpc_connection, proc_link);
 
 	sprintf(lbuff, "%pI4:%u",
 		&conn->params.local->srx.transport.sin.sin_addr,
@@ -165,7 +176,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 		   rxrpc_conn_states[conn->state],
 		   key_serial(conn->params.key),
 		   atomic_read(&conn->serial),
-		   atomic_read(&conn->hi_serial));
+		   conn->hi_serial);
 
 	return 0;
 }
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9ed66d533002..b964c2d49a88 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -111,6 +111,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		}
 
 	peek_next_packet:
+		rxrpc_see_skb(skb);
 		sp = rxrpc_skb(skb);
 		call = sp->call;
 		ASSERT(call != NULL);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 63afa9e9cc08..89f475febfd7 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -275,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
 	/* calculate the security checksum */
-	x = call->channel << (32 - RXRPC_CIDSHIFT);
+	x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
 	call->crypto_buf[0] = htonl(sp->hdr.callNumber);
 	call->crypto_buf[1] = htonl(x);
@@ -507,7 +507,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call,
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
 	/* validate the security checksum */
-	x = call->channel << (32 - RXRPC_CIDSHIFT);
+	x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
 	call->crypto_buf[0] = htonl(call->call_id);
 	call->crypto_buf[1] = htonl(x);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 06c51d4b622d..fbd8c74d9505 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -53,9 +53,9 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call)
 /*
  * drop the bottom ACK off of the call ACK window and advance the window
  */
-static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
-				struct rxrpc_skb_priv *sp)
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb)
 {
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	int loop;
 	u32 seq;
 
@@ -91,8 +91,8 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
 		 * its Tx bufferage.
 		 */
 		_debug("send Rx idle ACK");
-		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
-				    false);
+		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE,
+				    skb->priority, sp->hdr.serial, false);
 	}
 
 	spin_unlock_bh(&call->lock);
@@ -125,7 +125,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
 	ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
 
 	call->rx_data_recv = sp->hdr.seq;
-	rxrpc_hard_ACK_data(call, sp);
+	rxrpc_hard_ACK_data(call, skb);
 }
 EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
 
@@ -163,3 +163,65 @@ void rxrpc_kernel_free_skb(struct sk_buff *skb)
 	rxrpc_free_skb(skb);
 }
 EXPORT_SYMBOL(rxrpc_kernel_free_skb);
+
+/*
+ * Note the existence of a new-to-us socket buffer (allocated or dequeued).
+ */
+void rxrpc_new_skb(struct sk_buff *skb)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&rxrpc_n_skbs);
+	trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here);
+}
+
+/*
+ * Note the re-emergence of a socket buffer from a queue or buffer.
+ */
+void rxrpc_see_skb(struct sk_buff *skb)
+{
+	const void *here = __builtin_return_address(0);
+	if (skb) {
+		int n = atomic_read(&rxrpc_n_skbs);
+		trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here);
+	}
+}
+
+/*
+ * Note the addition of a ref on a socket buffer.
+ */
+void rxrpc_get_skb(struct sk_buff *skb)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&rxrpc_n_skbs);
+	trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here);
+	skb_get(skb);
+}
+
+/*
+ * Note the destruction of a socket buffer.
+ */
+void rxrpc_free_skb(struct sk_buff *skb)
+{
+	const void *here = __builtin_return_address(0);
+	if (skb) {
+		int n;
+		CHECK_SLAB_OKAY(&skb->users);
+		n = atomic_dec_return(&rxrpc_n_skbs);
+		trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here);
+		kfree_skb(skb);
+	}
+}
+
+/*
+ * Clear a queue of socket buffers.
+ */
+void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+	const void *here = __builtin_return_address(0);
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue((list))) != NULL) {
+		int n = atomic_dec_return(&rxrpc_n_skbs);
+		trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here);
+		kfree_skb(skb);
+	}
+}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 03ad08774d4e..dc380af8a81e 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -62,6 +62,22 @@ static struct ctl_table rxrpc_sysctl_table[] = {
 		.proc_handler	= proc_dointvec_ms_jiffies,
 		.extra1		= (void *)&one,
 	},
+	{
+		.procname	= "idle_conn_expiry",
+		.data		= &rxrpc_conn_idle_client_expiry,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "idle_conn_fast_expiry",
+		.data		= &rxrpc_conn_idle_client_fast_expiry,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
 
 	/* Values measured in seconds but used in jiffies */
 	{
@@ -81,17 +97,24 @@ static struct ctl_table rxrpc_sysctl_table[] = {
 		.extra1		= (void *)&one,
 	},
 
-	/* Values measured in seconds */
+	/* Non-time values */
+	{
+		.procname	= "max_client_conns",
+		.data		= &rxrpc_max_client_connections,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&rxrpc_reap_client_connections,
+	},
 	{
-		.procname	= "connection_expiry",
-		.data		= &rxrpc_connection_expiry,
+		.procname	= "reap_client_conns",
+		.data		= &rxrpc_reap_client_connections,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= (void *)&one,
+		.extra2		= (void *)&rxrpc_max_client_connections,
 	},
-
-	/* Non-time values */
 	{
 		.procname	= "max_backlog",
 		.data		= &rxrpc_max_backlog,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 691409de3e1a..59a8d3150ae2 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 			goto drop;
 		break;
 	case TCA_VLAN_ACT_PUSH:
-		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid);
+		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
+				    (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
 		if (err)
 			goto drop;
 		break;
@@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 	[TCA_VLAN_PARMS]		= { .len = sizeof(struct tc_vlan) },
 	[TCA_VLAN_PUSH_VLAN_ID]		= { .type = NLA_U16 },
 	[TCA_VLAN_PUSH_VLAN_PROTOCOL]	= { .type = NLA_U16 },
+	[TCA_VLAN_PUSH_VLAN_PRIORITY]	= { .type = NLA_U8 },
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	int action;
 	__be16 push_vid = 0;
 	__be16 push_proto = 0;
+	u8 push_prio = 0;
 	bool exists = false;
 	int ret = 0, err;
 
@@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		} else {
 			push_proto = htons(ETH_P_8021Q);
 		}
+
+		if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+			push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
 		break;
 	default:
 		if (exists)
@@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 
 	v->tcfv_action = action;
 	v->tcfv_push_vid = push_vid;
+	v->tcfv_push_prio = push_prio;
 	v->tcfv_push_proto = push_proto;
 
 	v->tcf_action = parm->action;
@@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
-			  v->tcfv_push_proto)))
+			  v->tcfv_push_proto) ||
+	     (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
+					      v->tcfv_push_prio))))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &v->tcf_tm);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0b8c3ace671f..eb219b78cd49 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
 	if (err < 0)
@@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (!fnew)
 		return -ENOBUFS;
 
-	tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	if (err < 0)
+		goto errout;
+
 	err = -EINVAL;
 	if (handle) {
 		fnew->handle = handle;
@@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 
 	return 0;
 errout:
+	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
 }
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c3002c2c68bb..4742f415ee5b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -311,17 +311,19 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 	if (ret < 0)
 		return ret;
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	if (ret < 0)
+		goto errout;
 
 	if (tb[TCA_BPF_FLAGS]) {
 		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
 		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-			tcf_exts_destroy(&exts);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto errout;
 		}
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
@@ -331,10 +333,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
-	if (ret < 0) {
-		tcf_exts_destroy(&exts);
-		return ret;
-	}
+	if (ret < 0)
+		goto errout;
 
 	if (tb[TCA_BPF_CLASSID]) {
 		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
@@ -343,6 +343,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
 	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
+
+errout:
+	tcf_exts_destroy(&exts);
+	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -388,7 +392,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (!prog)
 		return -ENOBUFS;
 
-	tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	if (ret < 0)
+		goto errout;
 
 	if (oldprog) {
 		if (handle && oldprog->handle != handle) {
@@ -420,9 +426,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = (unsigned long) prog;
 	return 0;
+
 errout:
+	tcf_exts_destroy(&prog->exts);
 	kfree(prog);
-
 	return ret;
 }
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 4c85bd3a750c..85233c470035 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (!new)
 		return -ENOBUFS;
 
-	tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	if (err < 0)
+		goto errout;
 	new->handle = handle;
 	new->tp = tp;
 	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
@@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
 	if (err < 0)
 		goto errout;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0) {
+		tcf_exts_destroy(&e);
+		goto errout;
+	}
 
 	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
 	if (err < 0) {
@@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
 	return 0;
 errout:
+	tcf_exts_destroy(&new->exts);
 	kfree(new);
 	return err;
 }
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index fbfec6a18839..2c1ae549edbf 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -418,10 +418,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
+		goto err1;
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
-		return err;
+		goto err1;
 
 	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
 	if (err < 0)
@@ -432,13 +434,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (!fnew)
 		goto err2;
 
-	tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
+		goto err3;
 
 	fold = (struct flow_filter *)*arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
-			goto err2;
+			goto err3;
 
 		/* Copy fold into fnew */
 		fnew->tp = fold->tp;
@@ -458,31 +462,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err2;
+			goto err3;
 
 		if (mode == FLOW_MODE_HASH)
 			perturb_period = fold->perturb_period;
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err2;
+				goto err3;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
-			goto err2;
+			goto err3;
 		if (!tb[TCA_FLOW_KEYS])
-			goto err2;
+			goto err3;
 
 		mode = FLOW_MODE_MAP;
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err2;
+			goto err3;
 
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err2;
+				goto err3;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 
@@ -542,6 +546,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		call_rcu(&fold->rcu, flow_destroy_filter);
 	return 0;
 
+err3:
+	tcf_exts_destroy(&fnew->exts);
 err2:
 	tcf_em_tree_destroy(&t);
 	kfree(fnew);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5060801a2f6d..cf9ad5b50889 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -28,6 +28,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_control control;
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
+	struct flow_dissector_key_vlan vlan;
 	struct flow_dissector_key_addrs ipaddrs;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
@@ -293,6 +294,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
+
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -308,9 +313,29 @@ static void fl_set_key_val(struct nlattr **tb,
 		memcpy(mask, nla_data(tb[mask_type]), len);
 }
 
+static void fl_set_key_vlan(struct nlattr **tb,
+			    struct flow_dissector_key_vlan *key_val,
+			    struct flow_dissector_key_vlan *key_mask)
+{
+#define VLAN_PRIORITY_MASK	0x7
+
+	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+		key_val->vlan_id =
+			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+		key_mask->vlan_id = VLAN_VID_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+		key_val->vlan_priority =
+			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			VLAN_PRIORITY_MASK;
+		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
+	}
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
+	__be16 ethertype;
 #ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_FLOWER_INDEV]) {
 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -328,9 +353,20 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
 		       sizeof(key->eth.src));
 
-	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
-		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-		       sizeof(key->basic.n_proto));
+	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
+		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
+
+		if (ethertype == htons(ETH_P_8021Q)) {
+			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
+			fl_set_key_val(tb, &key->basic.n_proto,
+				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+				       sizeof(key->basic.n_proto));
+		} else {
+			key->basic.n_proto = ethertype;
+			mask->basic.n_proto = cpu_to_be16(~0);
+		}
+	}
 
 	if (key->basic.n_proto == htons(ETH_P_IP) ||
 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
@@ -404,12 +440,10 @@ static int fl_init_hashtable(struct cls_fl_head *head,
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
-#define FL_KEY_MEMBER_END_OFFSET(member)					\
-	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
 
-#define FL_KEY_IN_RANGE(mask, member)						\
-        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
-         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+#define FL_KEY_IS_MASKED(mask, member)						\
+	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
+		   0, FL_KEY_MEMBER_SIZE(member))				\
 
 #define FL_KEY_SET(keys, cnt, id, member)					\
 	do {									\
@@ -418,9 +452,9 @@ static int fl_init_hashtable(struct cls_fl_head *head,
 		cnt++;								\
 	} while(0);
 
-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
 	do {									\
-		if (FL_KEY_IN_RANGE(mask, member))				\
+		if (FL_KEY_IS_MASKED(mask, member))				\
 			FL_KEY_SET(keys, cnt, id, member);			\
 	} while(0);
 
@@ -432,14 +466,16 @@ static void fl_init_dissector(struct cls_fl_head *head,
 
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 
 	skb_flow_dissector_init(&head->dissector, keys, cnt);
 }
@@ -478,10 +514,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 	struct tcf_exts e;
 	int err;
 
-	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -550,7 +588,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (!fnew)
 		return -ENOBUFS;
 
-	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	if (err < 0)
+		goto errout;
 
 	if (!handle) {
 		handle = fl_grab_new_handle(tp, head);
@@ -614,6 +654,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	return 0;
 
 errout:
+	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
 }
@@ -668,6 +709,29 @@ static int fl_dump_key_val(struct sk_buff *skb,
 	return 0;
 }
 
+static int fl_dump_key_vlan(struct sk_buff *skb,
+			    struct flow_dissector_key_vlan *vlan_key,
+			    struct flow_dissector_key_vlan *vlan_mask)
+{
+	int err;
+
+	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
+		return 0;
+	if (vlan_mask->vlan_id) {
+		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+				  vlan_key->vlan_id);
+		if (err)
+			return err;
+	}
+	if (vlan_mask->vlan_priority) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+				 vlan_key->vlan_priority);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
@@ -712,6 +776,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
 			    sizeof(key->basic.n_proto)))
 		goto nla_put_failure;
+
+	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+		goto nla_put_failure;
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index f23a3b68bba6..cc0bda945800 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -195,10 +195,12 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	u32 mask;
 	int err;
 
-	tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 #endif /* CONFIG_NET_CLS_IND */
 		fnew->tp = f->tp;
 
-		tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		if (err < 0) {
+			kfree(fnew);
+			return err;
+		}
 
 		err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
 		if (err < 0) {
+			tcf_exts_destroy(&fnew->exts);
 			kfree(fnew);
 			return err;
 		}
@@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		return -ENOBUFS;
 
-	tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	if (err < 0)
+		goto errout;
 	f->id = handle;
 	f->tp = tp;
 
@@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	return 0;
 
 errout:
+	tcf_exts_destroy(&f->exts);
 	kfree(f);
 	return err;
 }
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 08a3b0a6f5ab..c91e65d81a48 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -383,17 +383,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 			    struct nlattr **tb, struct nlattr *est, int new,
 			    bool ovr)
 {
-	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
 	struct route4_filter *fp;
 	unsigned int h1;
 	struct route4_bucket *b;
 	struct tcf_exts e;
+	int err;
 
-	tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -EINVAL;
 	if (tb[TCA_ROUTE4_TO]) {
@@ -503,7 +505,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (!f)
 		goto errout;
 
-	tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	if (err < 0)
+		goto errout;
+
 	if (fold) {
 		f->id = fold->id;
 		f->iif = fold->iif;
@@ -557,6 +562,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	return 0;
 
 errout:
+	tcf_exts_destroy(&f->exts);
 	kfree(f);
 	return err;
 }
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index f9c9fc075fe6..4f05a19fb073 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0)
+		goto errout2;
 
 	f = (struct rsvp_filter *)*arg;
 	if (f) {
@@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			goto errout2;
 		}
 
-		tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		if (err < 0) {
+			kfree(n);
+			goto errout2;
+		}
 
 		if (tb[TCA_RSVP_CLASSID]) {
 			n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
@@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout2;
 
-	tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	if (err < 0)
+		goto errout;
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC]) {
 		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -627,6 +635,7 @@ insert:
 	goto insert;
 
 errout:
+	tcf_exts_destroy(&f->exts);
 	kfree(f);
 errout2:
 	tcf_exts_destroy(&e);
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 944c8ff45055..d9500709831f 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
 };
 
-static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r)
 {
 	memset(r, 0, sizeof(*r));
-	tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 }
 
 static void __tcindex_partial_destroy(struct rcu_head *head)
@@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head)
 	kfree(p);
 }
 
+static void tcindex_free_perfect_hash(struct tcindex_data *cp)
+{
+	int i;
+
+	for (i = 0; i < cp->hash; i++)
+		tcf_exts_destroy(&cp->perfect[i].exts);
+	kfree(cp->perfect);
+}
+
+static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+{
+	int i, err = 0;
+
+	cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
+			      GFP_KERNEL);
+	if (!cp->perfect)
+		return -ENOMEM;
+
+	for (i = 0; i < cp->hash; i++) {
+		err = tcf_exts_init(&cp->perfect[i].exts,
+				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+		if (err < 0)
+			goto errout;
+	}
+
+	return 0;
+
+errout:
+	tcindex_free_perfect_hash(cp);
+	return err;
+}
+
 static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
 		  struct tcindex_filter_result *r, struct nlattr **tb,
 		  struct nlattr *est, bool ovr)
 {
-	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
 	struct tcindex_filter_result cr;
-	struct tcindex_data *cp, *oldp;
+	struct tcindex_data *cp = NULL, *oldp;
 	struct tcindex_filter *f = NULL; /* make gcc behave */
+	int err, balloc = 0;
 	struct tcf_exts e;
 
-	tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -ENOMEM;
 	/* tcindex_data attributes must look atomic to classifier/lookup so
@@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	if (p->perfect) {
 		int i;
 
-		cp->perfect = kmemdup(p->perfect,
-				      sizeof(*r) * cp->hash, GFP_KERNEL);
-		if (!cp->perfect)
+		if (tcindex_alloc_perfect_hash(cp) < 0)
 			goto errout;
 		for (i = 0; i < cp->hash; i++)
-			tcf_exts_init(&cp->perfect[i].exts,
-				      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+			cp->perfect[i].res = p->perfect[i].res;
 		balloc = 1;
 	}
 	cp->h = p->h;
 
-	tcindex_filter_result_init(&new_filter_result);
-	tcindex_filter_result_init(&cr);
+	err = tcindex_filter_result_init(&new_filter_result);
+	if (err < 0)
+		goto errout1;
+	err = tcindex_filter_result_init(&cr);
+	if (err < 0)
+		goto errout1;
 	if (old_r)
 		cr.res = r->res;
 
@@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	err = -ENOMEM;
 	if (!cp->perfect && !cp->h) {
 		if (valid_perfect_hash(cp)) {
-			int i;
-
-			cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
-			if (!cp->perfect)
+			if (tcindex_alloc_perfect_hash(cp) < 0)
 				goto errout_alloc;
-			for (i = 0; i < cp->hash; i++)
-				tcf_exts_init(&cp->perfect[i].exts,
-					      TCA_TCINDEX_ACT,
-					      TCA_TCINDEX_POLICE);
 			balloc = 1;
 		} else {
 			struct tcindex_filter __rcu **hash;
@@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		if (!f)
 			goto errout_alloc;
 		f->key = handle;
-		tcindex_filter_result_init(&f->result);
 		f->next = NULL;
+		err = tcindex_filter_result_init(&f->result);
+		if (err < 0) {
+			kfree(f);
+			goto errout_alloc;
+		}
 	}
 
 	if (tb[TCA_TCINDEX_CLASSID]) {
@@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	else
 		tcf_exts_change(tp, &cr.exts, &e);
 
-	if (old_r && old_r != r)
-		tcindex_filter_result_init(old_r);
+	if (old_r && old_r != r) {
+		err = tcindex_filter_result_init(old_r);
+		if (err < 0) {
+			kfree(f);
+			goto errout_alloc;
+		}
+	}
 
 	oldp = p;
 	r->res = cr.res;
@@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 
 errout_alloc:
 	if (balloc == 1)
-		kfree(cp->perfect);
+		tcindex_free_perfect_hash(cp);
 	else if (balloc == 2)
 		kfree(cp->h);
+errout1:
+	tcf_exts_destroy(&cr.exts);
+	tcf_exts_destroy(&new_filter_result.exts);
 errout:
 	kfree(cp);
 	tcf_exts_destroy(&e);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ffe593efe930..a29263a9d8c1 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -709,13 +709,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr)
 {
-	int err;
 	struct tcf_exts e;
+	int err;
 
-	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
@@ -833,7 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 	new->tp = tp;
 	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 
-	tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+		kfree(new);
+		return NULL;
+	}
 
 	return new;
 }
@@ -985,9 +990,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	n->handle = handle;
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 	n->flags = flags;
-	tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
 	n->tp = tp;
 
+	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	if (err < 0)
+		goto errout;
+
 #ifdef CONFIG_CLS_U32_MARK
 	n->pcpu_success = alloc_percpu(u32);
 	if (!n->pcpu_success) {
@@ -1028,9 +1036,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 errhw:
 #ifdef CONFIG_CLS_U32_MARK
 	free_percpu(n->pcpu_success);
-errout:
 #endif
 
+errout:
+	tcf_exts_destroy(&n->exts);
 #ifdef CONFIG_CLS_U32_PERF
 	free_percpu(n->pf);
 #endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 12ebde845523..d677b3484d81 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
 #include <linux/slab.h>
+#include <linux/hashtable.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -259,37 +260,40 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 {
 	struct Qdisc *q;
 
+	if (!qdisc_dev(root))
+		return (root->handle == handle ? root : NULL);
+
 	if (!(root->flags & TCQ_F_BUILTIN) &&
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry_rcu(q, &root->list, list) {
+	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
 		if (q->handle == handle)
 			return q;
 	}
 	return NULL;
 }
 
-void qdisc_list_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
 		ASSERT_RTNL();
-		list_add_tail_rcu(&q->list, &root->list);
+		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_add);
+EXPORT_SYMBOL(qdisc_hash_add);
 
-void qdisc_list_del(struct Qdisc *q)
+void qdisc_hash_del(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		ASSERT_RTNL();
-		list_del_rcu(&q->list);
+		hash_del_rcu(&q->hash);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_del);
+EXPORT_SYMBOL(qdisc_hash_del);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
@@ -998,7 +1002,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				goto err_out4;
 		}
 
-		qdisc_list_add(sch);
+		qdisc_hash_add(sch);
 
 		return sch;
 	}
@@ -1431,10 +1435,11 @@ err_out:
 
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      int *q_idx_p, int s_q_idx)
+			      int *q_idx_p, int s_q_idx, bool recur)
 {
 	int ret = 0, q_idx = *q_idx_p;
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1449,7 +1454,17 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			goto done;
 		q_idx++;
 	}
-	list_for_each_entry(q, &root->list, list) {
+
+	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
+	 * itself has already been dumped.
+	 *
+	 * If we've already dumped the top-level (ingress) qdisc above and the global
+	 * qdisc hashtable, we don't want to hit it again
+	 */
+	if (!qdisc_dev(root) || !recur)
+		goto out;
+
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (q_idx < s_q_idx) {
 			q_idx++;
 			continue;
@@ -1490,13 +1505,13 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 			s_q_idx = 0;
 		q_idx = 0;
 
-		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0)
 			goto done;
 
 		dev_queue = dev_ingress_queue(dev);
 		if (dev_queue &&
 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
-				       &q_idx, s_q_idx) < 0)
+				       &q_idx, s_q_idx, false) < 0)
 			goto done;
 
 cont:
@@ -1765,6 +1780,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 			       int *t_p, int s_t)
 {
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1772,7 +1788,10 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
 		return -1;
 
-	list_for_each_entry(q, &root->list, list) {
+	if (!qdisc_dev(root))
+		return 0;
+
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 657c13362b19..0d21b567ff27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = {
 	.dequeue	=	noop_dequeue,
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
-	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
@@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 		sch->padded = (char *) sch - (char *) p;
 	}
-	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
 
 	spin_lock_init(&sch->busylock);
@@ -701,7 +699,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		return;
 
 #ifdef CONFIG_NET_SCHED
-	qdisc_list_del(qdisc);
+	qdisc_hash_del(qdisc);
 
 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
@@ -789,6 +787,10 @@ static void attach_default_qdiscs(struct net_device *dev)
 			qdisc->ops->attach(qdisc);
 		}
 	}
+#ifdef CONFIG_NET_SCHED
+	if (dev->qdisc)
+		qdisc_hash_add(dev->qdisc);
+#endif
 }
 
 static void transition_one_qdisc(struct net_device *dev,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ddc7bd74ecb..000f1d36128e 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -142,8 +142,6 @@ struct hfsc_class {
 					   link-sharing, max(myf, cfmin) */
 	u64	cl_myf;			/* my fit-time (calculated from this
 					   class's own upperlimit curve) */
-	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
-					   history dependence) */
 	u64	cl_cfmin;		/* earliest children's fit-time (used
 					   with cl_myf to obtain cl_f) */
 	u64	cl_cvtmin;		/* minimal virtual time among the
@@ -151,11 +149,8 @@ struct hfsc_class {
 					   (monotonic within a period) */
 	u64	cl_vtadj;		/* intra-period cumulative vt
 					   adjustment */
-	u64	cl_vtoff;		/* inter-period cumulative vt offset */
-	u64	cl_cvtmax;		/* max child's vt in the last period */
-	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
-	u64	cl_pcvtoff;		/* parent's cvtoff at initialization
-					   time */
+	u64	cl_cvtoff;		/* largest virtual time seen among
+					   the children */
 
 	struct internal_sc cl_rsc;	/* internal real-time service curve */
 	struct internal_sc cl_fsc;	/* internal fair service curve */
@@ -701,28 +696,16 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 			} else {
 				/*
 				 * first child for a new parent backlog period.
-				 * add parent's cvtmax to cvtoff to make a new
-				 * vt (vtoff + vt) larger than the vt in the
-				 * last period for all children.
+				 * initialize cl_vt to the highest value seen
+				 * among the siblings. this is analogous to
+				 * what cur_time would provide in realtime case.
 				 */
-				vt = cl->cl_parent->cl_cvtmax;
-				cl->cl_parent->cl_cvtoff += vt;
-				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_vt = cl->cl_parent->cl_cvtoff;
 				cl->cl_parent->cl_cvtmin = 0;
-				cl->cl_vt = 0;
 			}
 
-			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
-							cl->cl_pcvtoff;
-
 			/* update the virtual curve */
-			vt = cl->cl_vt + cl->cl_vtoff;
-			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
-						      cl->cl_total);
-			if (cl->cl_virtual.x == vt) {
-				cl->cl_virtual.x -= cl->cl_vtoff;
-				cl->cl_vtoff = 0;
-			}
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 			cl->cl_vtadj = 0;
 
 			cl->cl_vtperiod++;  /* increment vt period */
@@ -745,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 				/* compute myf */
 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
 						      cl->cl_total);
-				cl->cl_myfadj = 0;
 			}
 		}
 
@@ -779,8 +761,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 			go_passive = 0;
 
 		/* update vt */
-		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-			    - cl->cl_vtoff + cl->cl_vtadj;
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj;
 
 		/*
 		 * if vt of the class is smaller than cvtmin,
@@ -795,9 +776,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 		if (go_passive) {
 			/* no more active child, going passive */
 
-			/* update cvtmax of the parent class */
-			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
-				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+			/* update cvtoff of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtoff)
+				cl->cl_parent->cl_cvtoff = cl->cl_vt;
 
 			/* remove this class from the vt tree */
 			vttree_remove(cl);
@@ -813,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 
 		/* update f */
 		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
 			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
 							      cl->cl_total);
-#if 0
 			/*
 			 * This code causes classes to stay way under their
 			 * limit when multiple classes are used at gigabit
@@ -940,7 +922,7 @@ static void
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
 	sc2isc(fsc, &cl->cl_fsc);
-	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 	cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1094,7 +1076,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (parent->level == 0)
 		hfsc_purge_queue(sch, parent);
 	hfsc_adjust_levels(parent);
-	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
 	qdisc_class_hash_grow(sch, &q->clhash);
@@ -1482,16 +1463,12 @@ hfsc_reset_class(struct hfsc_class *cl)
 	cl->cl_e            = 0;
 	cl->cl_vt           = 0;
 	cl->cl_vtadj        = 0;
-	cl->cl_vtoff        = 0;
 	cl->cl_cvtmin       = 0;
-	cl->cl_cvtmax       = 0;
 	cl->cl_cvtoff       = 0;
-	cl->cl_pcvtoff      = 0;
 	cl->cl_vtperiod     = 0;
 	cl->cl_parentperiod = 0;
 	cl->cl_f            = 0;
 	cl->cl_myf          = 0;
-	cl->cl_myfadj       = 0;
 	cl->cl_cfmin        = 0;
 	cl->cl_nactive      = 0;
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b9439827c172..2bc8d7f8df16 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch)
 			qdisc_destroy(old);
 #ifdef CONFIG_NET_SCHED
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 #endif
 
 	}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 549c66359924..b5c502c78143 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch)
 		if (old)
 			qdisc_destroy(old);
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 	}
 	kfree(priv->qdiscs);
 	priv->qdiscs = NULL;
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
new file mode 100644
index 000000000000..6cff3f6d0c3a
--- /dev/null
+++ b/net/strparser/Kconfig
@@ -0,0 +1,4 @@
+
+config STREAM_PARSER
+	tristate
+	default n
diff --git a/net/strparser/Makefile b/net/strparser/Makefile
new file mode 100644
index 000000000000..858a126ebaa0
--- /dev/null
+++ b/net/strparser/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_STREAM_PARSER) += strparser.o
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
new file mode 100644
index 000000000000..5c7549b5b92c
--- /dev/null
+++ b/net/strparser/strparser.c
@@ -0,0 +1,510 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+
+static struct workqueue_struct *strp_wq;
+
+struct _strp_rx_msg {
+	/* Internal cb structure. struct strp_rx_msg must be first for passing
+	 * to upper layer.
+	 */
+	struct strp_rx_msg strp;
+	int accum_len;
+	int early_eaten;
+};
+
+static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+{
+	return (struct _strp_rx_msg *)((void *)skb->cb +
+		offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Lower lock held */
+static void strp_abort_rx_strp(struct strparser *strp, int err)
+{
+	struct sock *csk = strp->sk;
+
+	/* Unrecoverable error in receive */
+
+	del_timer(&strp->rx_msg_timer);
+
+	if (strp->rx_stopped)
+		return;
+
+	strp->rx_stopped = 1;
+
+	/* Report an error on the lower socket */
+	csk->sk_err = err;
+	csk->sk_error_report(csk);
+}
+
+static void strp_start_rx_timer(struct strparser *strp)
+{
+	if (strp->sk->sk_rcvtimeo)
+		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+}
+
+/* Lower lock held */
+static void strp_parser_err(struct strparser *strp, int err,
+			    read_descriptor_t *desc)
+{
+	desc->error = err;
+	kfree_skb(strp->rx_skb_head);
+	strp->rx_skb_head = NULL;
+	strp->cb.abort_parser(strp, err);
+}
+
+static inline int strp_peek_len(struct strparser *strp)
+{
+	struct socket *sock = strp->sk->sk_socket;
+
+	return sock->ops->peek_len(sock);
+}
+
+/* Lower socket lock held */
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+	struct _strp_rx_msg *rxm;
+	struct sk_buff *head, *skb;
+	size_t eaten = 0, cand_len;
+	ssize_t extra;
+	int err;
+	bool cloned_orig = false;
+
+	if (strp->rx_paused)
+		return 0;
+
+	head = strp->rx_skb_head;
+	if (head) {
+		/* Message already in progress */
+
+		rxm = _strp_rx_msg(head);
+		if (unlikely(rxm->early_eaten)) {
+			/* Already some number of bytes on the receive sock
+			 * data saved in rx_skb_head, just indicate they
+			 * are consumed.
+			 */
+			eaten = orig_len <= rxm->early_eaten ?
+				orig_len : rxm->early_eaten;
+			rxm->early_eaten -= eaten;
+
+			return eaten;
+		}
+
+		if (unlikely(orig_offset)) {
+			/* Getting data with a non-zero offset when a message is
+			 * in progress is not expected. If it does happen, we
+			 * need to clone and pull since we can't deal with
+			 * offsets in the skbs for a message expect in the head.
+			 */
+			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+			if (!orig_skb) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			if (!pskb_pull(orig_skb, orig_offset)) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				kfree_skb(orig_skb);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			cloned_orig = true;
+			orig_offset = 0;
+		}
+
+		if (!strp->rx_skb_nextp) {
+			/* We are going to append to the frags_list of head.
+			 * Need to unshare the frag_list.
+			 */
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				return 0;
+			}
+
+			if (unlikely(skb_shinfo(head)->frag_list)) {
+				/* We can't append to an sk_buff that already
+				 * has a frag_list. We create a new head, point
+				 * the frag_list of that to the old head, and
+				 * then are able to use the old head->next for
+				 * appending to the message.
+				 */
+				if (WARN_ON(head->next)) {
+					desc->error = -EINVAL;
+					return 0;
+				}
+
+				skb = alloc_skb(0, GFP_ATOMIC);
+				if (!skb) {
+					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					desc->error = -ENOMEM;
+					return 0;
+				}
+				skb->len = head->len;
+				skb->data_len = head->len;
+				skb->truesize = head->truesize;
+				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
+				strp->rx_skb_nextp = &head->next;
+				skb_shinfo(skb)->frag_list = head;
+				strp->rx_skb_head = skb;
+				head = skb;
+			} else {
+				strp->rx_skb_nextp =
+				    &skb_shinfo(head)->frag_list;
+			}
+		}
+	}
+
+	while (eaten < orig_len) {
+		/* Always clone since we will consume something */
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
+		if (!skb) {
+			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			desc->error = -ENOMEM;
+			break;
+		}
+
+		cand_len = orig_len - eaten;
+
+		head = strp->rx_skb_head;
+		if (!head) {
+			head = skb;
+			strp->rx_skb_head = head;
+			/* Will set rx_skb_nextp on next packet if needed */
+			strp->rx_skb_nextp = NULL;
+			rxm = _strp_rx_msg(head);
+			memset(rxm, 0, sizeof(*rxm));
+			rxm->strp.offset = orig_offset + eaten;
+		} else {
+			/* Unclone since we may be appending to an skb that we
+			 * already share a frag_list with.
+			 */
+			err = skb_unclone(skb, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				break;
+			}
+
+			rxm = _strp_rx_msg(head);
+			*strp->rx_skb_nextp = skb;
+			strp->rx_skb_nextp = &skb->next;
+			head->data_len += skb->len;
+			head->len += skb->len;
+			head->truesize += skb->truesize;
+		}
+
+		if (!rxm->strp.full_len) {
+			ssize_t len;
+
+			len = (*strp->cb.parse_msg)(strp, head);
+
+			if (!len) {
+				/* Need more header to determine length */
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+				rxm->accum_len += cand_len;
+				eaten += cand_len;
+				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				WARN_ON(eaten != orig_len);
+				break;
+			} else if (len < 0) {
+				if (len == -ESTRPIPE && rxm->accum_len) {
+					len = -ENODATA;
+					strp->rx_unrecov_intr = 1;
+				} else {
+					strp->rx_interrupted = 1;
+				}
+				strp_parser_err(strp, err, desc);
+				break;
+			} else if (len > strp->sk->sk_rcvbuf) {
+				/* Message length exceeds maximum allowed */
+				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				strp_parser_err(strp, -EMSGSIZE, desc);
+				break;
+			} else if (len <= (ssize_t)head->len -
+					  skb->len - rxm->strp.offset) {
+				/* Length must be into new skb (and also
+				 * greater than zero)
+				 */
+				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				strp_parser_err(strp, -EPROTO, desc);
+				break;
+			}
+
+			rxm->strp.full_len = len;
+		}
+
+		extra = (ssize_t)(rxm->accum_len + cand_len) -
+			rxm->strp.full_len;
+
+		if (extra < 0) {
+			/* Message not complete yet. */
+			if (rxm->strp.full_len - rxm->accum_len >
+			    strp_peek_len(strp)) {
+				/* Don't have the whole messages in the socket
+				 * buffer. Set strp->rx_need_bytes to wait for
+				 * the rest of the message. Also, set "early
+				 * eaten" since we've already buffered the skb
+				 * but don't consume yet per strp_read_sock.
+				 */
+
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+
+				strp->rx_need_bytes = rxm->strp.full_len -
+						       rxm->accum_len;
+				rxm->accum_len += cand_len;
+				rxm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				desc->count = 0; /* Stop reading socket */
+				break;
+			}
+			rxm->accum_len += cand_len;
+			eaten += cand_len;
+			WARN_ON(eaten != orig_len);
+			break;
+		}
+
+		/* Positive extra indicates ore bytes than needed for the
+		 * message
+		 */
+
+		WARN_ON(extra > cand_len);
+
+		eaten += (cand_len - extra);
+
+		/* Hurray, we have a new message! */
+		del_timer(&strp->rx_msg_timer);
+		strp->rx_skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.rx_msgs);
+
+		/* Give skb to upper layer */
+		strp->cb.rcv_msg(strp, head);
+
+		if (unlikely(strp->rx_paused)) {
+			/* Upper layer paused strp */
+			break;
+		}
+	}
+
+	if (cloned_orig)
+		kfree_skb(orig_skb);
+
+	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+
+	return eaten;
+}
+
+static int default_read_sock_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+/* Called with lock held on lower socket */
+static int strp_read_sock(struct strparser *strp)
+{
+	struct socket *sock = strp->sk->sk_socket;
+	read_descriptor_t desc;
+
+	desc.arg.data = strp;
+	desc.error = 0;
+	desc.count = 1; /* give more than one skb per call */
+
+	/* sk should be locked here, so okay to do read_sock */
+	sock->ops->read_sock(strp->sk, &desc, strp_recv);
+
+	desc.error = strp->cb.read_sock_done(strp, desc.error);
+
+	return desc.error;
+}
+
+/* Lower sock lock held */
+void strp_data_ready(struct strparser *strp)
+{
+	if (unlikely(strp->rx_stopped))
+		return;
+
+	/* This check is needed to synchronize with do_strp_rx_work.
+	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	 * the lock held here is bh_lock_sock. The two locks can be
+	 * held by different threads at the same time, but bh_lock_sock
+	 * allows a thread in BH context to safely check if the process
+	 * lock is held. In this case, if the lock is held, queue work.
+	 */
+	if (sock_owned_by_user(strp->sk)) {
+		queue_work(strp_wq, &strp->rx_work);
+		return;
+	}
+
+	if (strp->rx_paused)
+		return;
+
+	if (strp->rx_need_bytes) {
+		if (strp_peek_len(strp) >= strp->rx_need_bytes)
+			strp->rx_need_bytes = 0;
+		else
+			return;
+	}
+
+	if (strp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_data_ready);
+
+static void do_strp_rx_work(struct strparser *strp)
+{
+	read_descriptor_t rd_desc;
+	struct sock *csk = strp->sk;
+
+	/* We need the read lock to synchronize with strp_data_ready. We
+	 * need the socket lock for calling strp_read_sock.
+	 */
+	lock_sock(csk);
+
+	if (unlikely(strp->rx_stopped))
+		goto out;
+
+	if (strp->rx_paused)
+		goto out;
+
+	rd_desc.arg.data = strp;
+
+	if (strp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+
+out:
+	release_sock(csk);
+}
+
+static void strp_rx_work(struct work_struct *w)
+{
+	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+}
+
+static void strp_rx_msg_timeout(unsigned long arg)
+{
+	struct strparser *strp = (struct strparser *)arg;
+
+	/* Message assembly timed out */
+	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
+	lock_sock(strp->sk);
+	strp->cb.abort_parser(strp, ETIMEDOUT);
+	release_sock(strp->sk);
+}
+
+int strp_init(struct strparser *strp, struct sock *csk,
+	      struct strp_callbacks *cb)
+{
+	struct socket *sock = csk->sk_socket;
+
+	if (!cb || !cb->rcv_msg || !cb->parse_msg)
+		return -EINVAL;
+
+	if (!sock->ops->read_sock || !sock->ops->peek_len)
+		return -EAFNOSUPPORT;
+
+	memset(strp, 0, sizeof(*strp));
+
+	strp->sk = csk;
+
+	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->rx_work, strp_rx_work);
+
+	strp->cb.rcv_msg = cb->rcv_msg;
+	strp->cb.parse_msg = cb->parse_msg;
+	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(strp_init);
+
+void strp_unpause(struct strparser *strp)
+{
+	strp->rx_paused = 0;
+
+	/* Sync setting rx_paused with RX work */
+	smp_mb();
+
+	queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_unpause);
+
+/* strp must already be stopped so that strp_recv will no longer be called.
+ * Note that strp_done is not called with the lower socket held.
+ */
+void strp_done(struct strparser *strp)
+{
+	WARN_ON(!strp->rx_stopped);
+
+	del_timer_sync(&strp->rx_msg_timer);
+	cancel_work_sync(&strp->rx_work);
+
+	if (strp->rx_skb_head) {
+		kfree_skb(strp->rx_skb_head);
+		strp->rx_skb_head = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(strp_done);
+
+void strp_stop(struct strparser *strp)
+{
+	strp->rx_stopped = 1;
+}
+EXPORT_SYMBOL_GPL(strp_stop);
+
+void strp_check_rcv(struct strparser *strp)
+{
+	queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_check_rcv);
+
+static int __init strp_mod_init(void)
+{
+	strp_wq = create_singlethread_workqueue("kstrp");
+
+	return 0;
+}
+
+static void __exit strp_mod_exit(void)
+{
+}
+module_init(strp_mod_init);
+module_exit(strp_mod_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5fc9dd24aa9..1031a0327fff 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1292,12 +1292,10 @@ bool switchdev_port_same_parent_id(struct net_device *a,
 	struct switchdev_attr a_attr = {
 		.orig_dev = a,
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 	struct switchdev_attr b_attr = {
 		.orig_dev = b,
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 
 	if (switchdev_port_attr_get(a, &a_attr) ||
@@ -1306,89 +1304,4 @@ bool switchdev_port_same_parent_id(struct net_device *a,
 
 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 }
-
-static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
-				       struct net_device *group_dev)
-{
-	struct net_device *lower_dev;
-	struct list_head *iter;
-
-	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-		if (lower_dev == dev)
-			continue;
-		if (switchdev_port_same_parent_id(dev, lower_dev))
-			return lower_dev->offload_fwd_mark;
-		return switchdev_port_fwd_mark_get(dev, lower_dev);
-	}
-
-	return dev->ifindex;
-}
 EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
-static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
-					  u32 old_mark, u32 *reset_mark)
-{
-	struct net_device *lower_dev;
-	struct list_head *iter;
-
-	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-		if (lower_dev->offload_fwd_mark == old_mark) {
-			if (!*reset_mark)
-				*reset_mark = lower_dev->ifindex;
-			lower_dev->offload_fwd_mark = *reset_mark;
-		}
-		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
-	}
-}
-
-/**
- *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
- *
- *	@dev: port device
- *	@group_dev: containing device
- *	@joining: true if dev is joining group; false if leaving group
- *
- *	An ungrouped port's offload mark is just its ifindex.  A grouped
- *	port's (member of a bridge, for example) offload mark is the ifindex
- *	of one of the ports in the group with the same parent (switch) ID.
- *	Ports on the same device in the same group will have the same mark.
- *
- *	Example:
- *
- *		br0		ifindex=9
- *		  sw1p1		ifindex=2	mark=2
- *		  sw1p2		ifindex=3	mark=2
- *		  sw2p1		ifindex=4	mark=5
- *		  sw2p2		ifindex=5	mark=5
- *
- *	If sw2p2 leaves the bridge, we'll have:
- *
- *		br0		ifindex=9
- *		  sw1p1		ifindex=2	mark=2
- *		  sw1p2		ifindex=3	mark=2
- *		  sw2p1		ifindex=4	mark=4
- *		sw2p2		ifindex=5	mark=5
- */
-void switchdev_port_fwd_mark_set(struct net_device *dev,
-				 struct net_device *group_dev,
-				 bool joining)
-{
-	u32 mark = dev->ifindex;
-	u32 reset_mark = 0;
-
-	if (group_dev) {
-		ASSERT_RTNL();
-		if (joining)
-			mark = switchdev_port_fwd_mark_get(dev, group_dev);
-		else if (dev->offload_fwd_mark == mark)
-			/* Ohoh, this port was the mark reference port,
-			 * but it's leaving the group, so reset the
-			 * mark for the remaining ports in the group.
-			 */
-			switchdev_port_fwd_mark_reset(group_dev, mark,
-						      &reset_mark);
-	}
-
-	dev->offload_fwd_mark = mark;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 46a71c701e7c..5bc1a3d57401 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head,
 			       struct ctl_table *table)
 {
 	struct net *net = container_of(head->set, struct net, sysctls);
-	kuid_t root_uid = make_kuid(net->user_ns, 0);
-	kgid_t root_gid = make_kgid(net->user_ns, 0);
 
 	/* Allow network administrator to have same access as root. */
-	if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
-	    uid_eq(root_uid, current_euid())) {
+	if (ns_capable(net->user_ns, CAP_NET_ADMIN)) {
 		int mode = (table->mode >> 6) & 7;
 		return (mode << 6) | (mode << 3) | mode;
 	}
-	/* Allow netns root group to have the same access as the root group */
-	if (in_egroup_p(root_gid)) {
-		int mode = (table->mode >> 3) & 7;
-		return (mode << 3) | mode;
-	}
+
 	return table->mode;
 }
 
+static void net_ctl_set_ownership(struct ctl_table_header *head,
+				  struct ctl_table *table,
+				  kuid_t *uid, kgid_t *gid)
+{
+	struct net *net = container_of(head->set, struct net, sysctls);
+	kuid_t ns_root_uid;
+	kgid_t ns_root_gid;
+
+	ns_root_uid = make_kuid(net->user_ns, 0);
+	if (uid_valid(ns_root_uid))
+		*uid = ns_root_uid;
+
+	ns_root_gid = make_kgid(net->user_ns, 0);
+	if (gid_valid(ns_root_gid))
+		*gid = ns_root_gid;
+}
+
 static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
 	.permissions = net_ctl_permissions,
+	.set_ownership = net_ctl_set_ownership,
 };
 
 static int __net_init sysctl_net_init(struct net *net)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 65b1bbf133bd..975dbeb60ab0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -42,6 +42,7 @@
 #include "monitor.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "udp_media.h"
 
 #define MAX_ADDR_STR 60
 
@@ -56,6 +57,13 @@ static struct tipc_media * const media_info_array[] = {
 	NULL
 };
 
+static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+}
+
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
 
 /**
@@ -323,6 +331,7 @@ restart:
 	b->domain = disc_domain;
 	b->net_plane = bearer_id + 'A';
 	b->priority = priority;
+	test_and_set_bit_lock(0, &b->up);
 
 	res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
 	if (res) {
@@ -360,15 +369,24 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
  */
 void tipc_bearer_reset_all(struct net *net)
 {
-	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer *b;
 	int i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b = rcu_dereference_rtnl(tn->bearer_list[i]);
+		b = bearer_get(net, i);
+		if (b)
+			clear_bit_unlock(0, &b->up);
+	}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = bearer_get(net, i);
 		if (b)
 			tipc_reset_bearer(net, b);
 	}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = bearer_get(net, i);
+		if (b)
+			test_and_set_bit_lock(0, &b->up);
+	}
 }
 
 /**
@@ -382,8 +400,9 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 	int bearer_id = b->identity;
 
 	pr_info("Disabling bearer <%s>\n", b->name);
-	b->media->disable_media(b);
+	clear_bit_unlock(0, &b->up);
 	tipc_node_delete_links(net, bearer_id);
+	b->media->disable_media(b);
 	RCU_INIT_POINTER(b->media_ptr, NULL);
 	if (b->link_req)
 		tipc_disc_delete(b->link_req);
@@ -440,22 +459,16 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 {
 	struct net_device *dev;
 	int delta;
-	void *tipc_ptr;
 
 	dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
 	if (!dev)
 		return 0;
 
-	/* Send RESET message even if bearer is detached from device */
-	tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
-	if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
-		goto drop;
-
-	delta = dev->hard_header_len - skb_headroom(skb);
-	if ((delta > 0) &&
-	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
-		goto drop;
-
+	delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+	if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) {
+		kfree_skb(skb);
+		return 0;
+	}
 	skb_reset_network_header(skb);
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_TIPC);
@@ -463,9 +476,6 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 			dev->dev_addr, skb->len);
 	dev_queue_xmit(skb);
 	return 0;
-drop:
-	kfree_skb(skb);
-	return 0;
 }
 
 int tipc_bearer_mtu(struct net *net, u32 bearer_id)
@@ -487,12 +497,12 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
 			  struct sk_buff *skb,
 			  struct tipc_media_addr *dest)
 {
-	struct tipc_net *tn = tipc_net(net);
+	struct tipc_msg *hdr = buf_msg(skb);
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (likely(b))
+	b = bearer_get(net, bearer_id);
+	if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
 		b->media->send_msg(net, skb, b, dest);
 	else
 		kfree_skb(skb);
@@ -505,7 +515,6 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
 		      struct sk_buff_head *xmitq,
 		      struct tipc_media_addr *dst)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b;
 	struct sk_buff *skb, *tmp;
 
@@ -513,12 +522,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
 		return;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	b = bearer_get(net, bearer_id);
 	if (unlikely(!b))
 		__skb_queue_purge(xmitq);
 	skb_queue_walk_safe(xmitq, skb, tmp) {
 		__skb_dequeue(xmitq);
-		b->media->send_msg(net, skb, b, dst);
+		if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
+			b->media->send_msg(net, skb, b, dst);
+		else
+			kfree_skb(skb);
 	}
 	rcu_read_unlock();
 }
@@ -535,8 +547,8 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
 	struct tipc_msg *hdr;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (unlikely(!b))
+	b = bearer_get(net, bearer_id);
+	if (unlikely(!b || !test_bit(0, &b->up)))
 		__skb_queue_purge(xmitq);
 	skb_queue_walk_safe(xmitq, skb, tmp) {
 		hdr = buf_msg(skb);
@@ -566,7 +578,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
 
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(dev->tipc_ptr);
-	if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) {
+	if (likely(b && test_bit(0, &b->up) &&
+		   (skb->pkt_type <= PACKET_BROADCAST))) {
 		skb->next = NULL;
 		tipc_rcv(dev_net(dev), skb, b);
 		rcu_read_unlock();
@@ -591,18 +604,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
-	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer *b;
-	int i;
 
 	b = rtnl_dereference(dev->tipc_ptr);
-	if (!b) {
-		for (i = 0; i < MAX_BEARERS; b = NULL, i++) {
-			b = rtnl_dereference(tn->bearer_list[i]);
-			if (b && (b->media_ptr == dev))
-				break;
-		}
-	}
 	if (!b)
 		return NOTIFY_DONE;
 
@@ -613,11 +617,10 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 		if (netif_carrier_ok(dev))
 			break;
 	case NETDEV_UP:
-		rcu_assign_pointer(dev->tipc_ptr, b);
+		test_and_set_bit_lock(0, &b->up);
 		break;
 	case NETDEV_GOING_DOWN:
-		RCU_INIT_POINTER(dev->tipc_ptr, NULL);
-		synchronize_net();
+		clear_bit_unlock(0, &b->up);
 		tipc_reset_bearer(net, b);
 		break;
 	case NETDEV_CHANGEMTU:
@@ -709,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
 		goto prop_msg_full;
 
 	nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+		if (tipc_udp_nl_add_bearer_data(msg, bearer))
+			goto attr_msg_full;
+	}
+#endif
+
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -895,6 +906,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	char *name;
+	struct tipc_bearer *b;
+	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+
+	if (!info->attrs[TIPC_NLA_BEARER])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
+			       info->attrs[TIPC_NLA_BEARER],
+			       tipc_nl_bearer_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_BEARER_NAME])
+		return -EINVAL;
+	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+	rtnl_lock();
+	b = tipc_bearer_find(net, name);
+	if (!b) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+		err = tipc_udp_nl_bearer_add(b,
+					     attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+		if (err) {
+			rtnl_unlock();
+			return err;
+		}
+	}
+#endif
+	rtnl_unlock();
+
+	return 0;
+}
+
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 43757f1f9cb3..78892e2f53e3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -150,6 +150,7 @@ struct tipc_bearer {
 	u32 identity;
 	struct tipc_link_req *link_req;
 	char net_plane;
+	unsigned long up;
 };
 
 struct tipc_bearer_names {
@@ -180,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 877d94f34814..2c6e1b9e024b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -807,7 +807,7 @@ void link_prepare_wakeup(struct tipc_link *l)
 
 	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
 		imp = TIPC_SKB_CB(skb)->chain_imp;
-		lim = l->window + l->backlog[imp].limit;
+		lim = l->backlog[imp].limit;
 		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
 		if ((pnd[imp] + l->backlog[imp].len) >= lim)
 			break;
@@ -873,9 +873,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 	struct sk_buff *skb, *_skb, *bskb;
 
 	/* Match msg importance against this and all higher backlog limits: */
-	for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
-		if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
-			return link_schedule_user(l, list);
+	if (!skb_queue_empty(backlogq)) {
+		for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+			if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+				return link_schedule_user(l, list);
+		}
 	}
 	if (unlikely(msg_size(hdr) > mtu)) {
 		skb_queue_purge(list);
@@ -1692,10 +1694,10 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
 	int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
 
 	l->window = win;
-	l->backlog[TIPC_LOW_IMPORTANCE].limit      = win / 2;
-	l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = win;
-	l->backlog[TIPC_HIGH_IMPORTANCE].limit     = win / 2 * 3;
-	l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2;
+	l->backlog[TIPC_LOW_IMPORTANCE].limit      = max_t(u16, 50, win);
+	l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = max_t(u16, 100, win * 2);
+	l->backlog[TIPC_HIGH_IMPORTANCE].limit     = max_t(u16, 150, win * 3);
+	l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
 	l->backlog[TIPC_SYSTEM_IMPORTANCE].limit   = max_bulk;
 }
 
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 77a7a118911d..c7c254902873 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -39,6 +39,8 @@
 
 #include <net/genetlink.h>
 
+extern const struct nla_policy tipc_nl_net_policy[];
+
 int tipc_net_start(struct net *net, u32 addr);
 
 void tipc_net_stop(struct net *net);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index a84daec0afe9..3200059d14b2 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -41,6 +41,7 @@
 #include "link.h"
 #include "node.h"
 #include "net.h"
+#include "udp_media.h"
 #include <net/genetlink.h>
 
 static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
@@ -161,6 +162,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 		.policy = tipc_nl_policy,
 	},
 	{
+		.cmd	= TIPC_NL_BEARER_ADD,
+		.doit	= tipc_nl_bearer_add,
+		.policy = tipc_nl_policy,
+	},
+	{
 		.cmd	= TIPC_NL_BEARER_SET,
 		.doit	= tipc_nl_bearer_set,
 		.policy = tipc_nl_policy,
@@ -238,6 +244,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 		.dumpit	= tipc_nl_node_dump_monitor_peer,
 		.policy = tipc_nl_policy,
 	},
+	{
+		.cmd	= TIPC_NL_PEER_REMOVE,
+		.doit	= tipc_nl_peer_rm,
+		.policy = tipc_nl_policy,
+	},
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	{
+		.cmd	= TIPC_NL_UDP_GET_REMOTEIP,
+		.dumpit	= tipc_udp_nl_dump_remoteip,
+		.policy = tipc_nl_policy,
+	},
+#endif
 };
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 21974191e425..7e8b75fd1a02 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1553,6 +1553,69 @@ discard:
 	kfree_skb(skb);
 }
 
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	struct tipc_node *peer;
+	u32 addr;
+	int err;
+	int i;
+
+	/* We identify the peer by its net */
+	if (!info->attrs[TIPC_NLA_NET])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
+			       info->attrs[TIPC_NLA_NET],
+			       tipc_nl_net_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_NET_ADDR])
+		return -EINVAL;
+
+	addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+
+	if (in_own_node(net, addr))
+		return -ENOTSUPP;
+
+	spin_lock_bh(&tn->node_list_lock);
+	peer = tipc_node_find(net, addr);
+	if (!peer) {
+		spin_unlock_bh(&tn->node_list_lock);
+		return -ENXIO;
+	}
+
+	tipc_node_write_lock(peer);
+	if (peer->state != SELF_DOWN_PEER_DOWN &&
+	    peer->state != SELF_DOWN_PEER_LEAVING) {
+		tipc_node_write_unlock(peer);
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		struct tipc_link_entry *le = &peer->links[i];
+
+		if (le->link) {
+			kfree(le->link);
+			le->link = NULL;
+			peer->link_cnt--;
+		}
+	}
+	tipc_node_write_unlock(peer);
+	tipc_node_delete(peer);
+
+	err = 0;
+err_out:
+	tipc_node_put(peer);
+	spin_unlock_bh(&tn->node_list_lock);
+
+	return err;
+}
+
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index d69fdfcc0ec9..4578b34c7dca 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -77,6 +77,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ae7e14cae085..dd274687a53d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -49,6 +49,7 @@
 #include "core.h"
 #include "bearer.h"
 #include "netlink.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
@@ -70,6 +71,13 @@ struct udp_media_addr {
 	};
 };
 
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+	struct udp_media_addr addr;
+	struct rcu_head rcu;
+	struct list_head list;
+};
+
 /**
  * struct udp_bearer - ip/udp bearer data structure
  * @bearer:	associated generic tipc bearer
@@ -82,8 +90,20 @@ struct udp_bearer {
 	struct socket *ubsock;
 	u32 ifindex;
 	struct work_struct work;
+	struct udp_replicast rcast;
 };
 
+static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
+{
+	if (ntohs(addr->proto) == ETH_P_IP)
+		return ipv4_is_multicast(addr->ipv4.s_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		return ipv6_addr_is_multicast(&addr->ipv6);
+#endif
+	return 0;
+}
+
 /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
 static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
 				    struct udp_media_addr *ua)
@@ -91,15 +111,9 @@ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
 	memset(addr, 0, sizeof(struct tipc_media_addr));
 	addr->media_id = TIPC_MEDIA_TYPE_UDP;
 	memcpy(addr->value, ua, sizeof(struct udp_media_addr));
-	if (ntohs(ua->proto) == ETH_P_IP) {
-		if (ipv4_is_multicast(ua->ipv4.s_addr))
-			addr->broadcast = 1;
-	} else if (ntohs(ua->proto) == ETH_P_IPV6) {
-		if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
-			addr->broadcast = 1;
-	} else {
-		pr_err("Invalid UDP media address\n");
-	}
+
+	if (tipc_udp_is_mcast_addr(ua))
+		addr->broadcast = 1;
 }
 
 /* tipc_udp_addr2str - convert ip/udp address to string */
@@ -140,28 +154,13 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
 }
 
 /* tipc_send_msg - enqueue a send request */
-static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
-			     struct tipc_bearer *b,
-			     struct tipc_media_addr *dest)
+static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+			 struct udp_bearer *ub, struct udp_media_addr *src,
+			 struct udp_media_addr *dst)
 {
 	int ttl, err = 0;
-	struct udp_bearer *ub;
-	struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
-	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
 	struct rtable *rt;
 
-	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
-		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
-		if (err)
-			goto tx_error;
-	}
-
-	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
-	ub = rcu_dereference_rtnl(b->media_ptr);
-	if (!ub) {
-		err = -ENODEV;
-		goto tx_error;
-	}
 	if (dst->proto == htons(ETH_P_IP)) {
 		struct flowi4 fl = {
 			.daddr = dst->ipv4.s_addr,
@@ -207,29 +206,178 @@ tx_error:
 	return err;
 }
 
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+			     struct tipc_bearer *b,
+			     struct tipc_media_addr *addr)
+{
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+	struct udp_replicast *rcast;
+	struct udp_bearer *ub;
+	int err = 0;
+
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+		if (err)
+			goto out;
+	}
+
+	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (!addr->broadcast || list_empty(&ub->rcast.list))
+		return tipc_udp_xmit(net, skb, ub, src, dst);
+
+	/* Replicast, send an skb to each configured IP address */
+	list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+		struct sk_buff *_skb;
+
+		_skb = pskb_copy(skb, GFP_ATOMIC);
+		if (!_skb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+		if (err) {
+			kfree_skb(_skb);
+			goto out;
+		}
+	}
+	err = 0;
+out:
+	kfree_skb(skb);
+	return err;
+}
+
+static bool tipc_udp_is_known_peer(struct tipc_bearer *b,
+				   struct udp_media_addr *addr)
+{
+	struct udp_replicast *rcast, *tmp;
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		pr_err_ratelimited("UDP bearer instance not found\n");
+		return false;
+	}
+
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr)))
+			return true;
+	}
+
+	return false;
+}
+
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+			      struct udp_media_addr *addr)
+{
+	struct udp_replicast *rcast;
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+	if (!rcast)
+		return -ENOMEM;
+
+	memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+	if (ntohs(addr->proto) == ETH_P_IP)
+		pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (ntohs(addr->proto) == ETH_P_IPV6)
+		pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+
+	list_add_rcu(&rcast->list, &ub->rcast.list);
+	return 0;
+}
+
+static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb)
+{
+	struct udp_media_addr src = {0};
+	struct udp_media_addr *dst;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (tipc_udp_is_mcast_addr(dst))
+		return 0;
+
+	src.port = udp_hdr(skb)->source;
+
+	if (ip_hdr(skb)->version == 4) {
+		struct iphdr *iphdr = ip_hdr(skb);
+
+		src.proto = htons(ETH_P_IP);
+		src.ipv4.s_addr = iphdr->saddr;
+		if (ipv4_is_multicast(iphdr->daddr))
+			return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (ip_hdr(skb)->version == 6) {
+		struct ipv6hdr *iphdr = ipv6_hdr(skb);
+
+		src.proto = htons(ETH_P_IPV6);
+		src.ipv6 = iphdr->saddr;
+		if (ipv6_addr_is_multicast(&iphdr->daddr))
+			return 0;
+#endif
+	} else {
+		return 0;
+	}
+
+	if (likely(tipc_udp_is_known_peer(b, &src)))
+		return 0;
+
+	return tipc_udp_rcast_add(b, &src);
+}
+
 /* tipc_udp_recv - read data from bearer socket */
 static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_bearer *ub;
 	struct tipc_bearer *b;
+	struct tipc_msg *hdr;
+	int err;
 
 	ub = rcu_dereference_sk_user_data(sk);
 	if (!ub) {
 		pr_err_ratelimited("Failed to get UDP bearer reference");
-		kfree_skb(skb);
-		return 0;
+		goto out;
 	}
-
 	skb_pull(skb, sizeof(struct udphdr));
+	hdr = buf_msg(skb);
+
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(ub->bearer);
+	if (!b)
+		goto rcu_out;
 
-	if (b) {
+	if (b && test_bit(0, &b->up)) {
 		tipc_rcv(sock_net(sk), skb, b);
 		rcu_read_unlock();
 		return 0;
 	}
+
+	if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
+		err = tipc_udp_rcast_disc(b, skb);
+		if (err)
+			goto rcu_out;
+	}
+
+	tipc_rcv(sock_net(sk), skb, b);
 	rcu_read_unlock();
+	return 0;
+
+rcu_out:
+	rcu_read_unlock();
+out:
 	kfree_skb(skb);
 	return 0;
 }
@@ -241,15 +389,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
 	struct sock *sk = ub->ubsock->sk;
 
 	if (ntohs(remote->proto) == ETH_P_IP) {
-		if (!ipv4_is_multicast(remote->ipv4.s_addr))
-			return 0;
 		mreqn.imr_multiaddr = remote->ipv4;
 		mreqn.imr_ifindex = ub->ifindex;
 		err = ip_mc_join_group(sk, &mreqn);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		if (!ipv6_addr_is_multicast(&remote->ipv6))
-			return 0;
 		err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
 						   &remote->ipv6);
 #endif
@@ -257,75 +401,234 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
 	return err;
 }
 
-/**
- * parse_options - build local/remote addresses from configuration
- * @attrs:	netlink config data
- * @ub:		UDP bearer instance
- * @local:	local bearer IP address/port
- * @remote:	peer or multicast IP/port
- */
-static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
-			 struct udp_media_addr *local,
-			 struct udp_media_addr *remote)
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+				  struct udp_media_addr *addr, int nla_t)
 {
-	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
-	struct sockaddr_storage sa_local, sa_remote;
+	if (ntohs(addr->proto) == ETH_P_IP) {
+		struct sockaddr_in ip4;
 
-	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
-		goto err;
-	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
-			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
-			     tipc_nl_udp_policy))
-		goto err;
-	if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
-		nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
-			   sizeof(sa_local));
-		nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
-			   sizeof(sa_remote));
+		ip4.sin_family = AF_INET;
+		ip4.sin_port = addr->port;
+		ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+		if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+			return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (ntohs(addr->proto) == ETH_P_IPV6) {
+		struct sockaddr_in6 ip6;
+
+		ip6.sin6_family = AF_INET6;
+		ip6.sin6_port  = addr->port;
+		memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+		if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+			return -EMSGSIZE;
+#endif
+	}
+
+	return 0;
+}
+
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	u32 bid = cb->args[0];
+	u32 skip_cnt = cb->args[1];
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct udp_replicast *rcast, *tmp;
+	struct tipc_bearer *b;
+	struct udp_bearer *ub;
+	void *hdr;
+	int err;
+	int i;
+
+	if (!bid && !skip_cnt) {
+		struct net *net = sock_net(skb->sk);
+		struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
+		struct nlattr **attrs;
+		char *bname;
+
+		err = tipc_nlmsg_parse(cb->nlh, &attrs);
+		if (err)
+			return err;
+
+		if (!attrs[TIPC_NLA_BEARER])
+			return -EINVAL;
+
+		err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
+				       attrs[TIPC_NLA_BEARER],
+				       tipc_nl_bearer_policy);
+		if (err)
+			return err;
+
+		if (!battrs[TIPC_NLA_BEARER_NAME])
+			return -EINVAL;
+
+		bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]);
+
+		rtnl_lock();
+		b = tipc_bearer_find(net, bname);
+		if (!b) {
+			rtnl_unlock();
+			return -EINVAL;
+		}
+		bid = b->identity;
 	} else {
-err:
-		pr_err("Invalid UDP bearer configuration");
+		struct net *net = sock_net(skb->sk);
+		struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+		rtnl_lock();
+		b = rtnl_dereference(tn->bearer_list[bid]);
+		if (!b) {
+			rtnl_unlock();
+			return -EINVAL;
+		}
+	}
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		rtnl_unlock();
 		return -EINVAL;
 	}
-	if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
-		struct sockaddr_in *ip4;
-
-		ip4 = (struct sockaddr_in *)&sa_local;
-		local->proto = htons(ETH_P_IP);
-		local->port = ip4->sin_port;
-		local->ipv4.s_addr = ip4->sin_addr.s_addr;
-
-		ip4 = (struct sockaddr_in *)&sa_remote;
-		remote->proto = htons(ETH_P_IP);
-		remote->port = ip4->sin_port;
-		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+	i = 0;
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		if (i < skip_cnt)
+			goto count;
+
+		hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq,
+				  &tipc_genl_family, NLM_F_MULTI,
+				  TIPC_NL_BEARER_GET);
+		if (!hdr)
+			goto done;
+
+		err = __tipc_nl_add_udp_addr(skb, &rcast->addr,
+					     TIPC_NLA_UDP_REMOTE);
+		if (err) {
+			genlmsg_cancel(skb, hdr);
+			goto done;
+		}
+		genlmsg_end(skb, hdr);
+count:
+		i++;
+	}
+done:
+	rtnl_unlock();
+	cb->args[0] = bid;
+	cb->args[1] = i;
+
+	return skb->len;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct udp_media_addr *dst;
+	struct udp_bearer *ub;
+	struct nlattr *nest;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+	if (!nest)
+		goto msg_full;
+
+	if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+		goto msg_full;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+		goto msg_full;
+
+	if (!list_empty(&ub->rcast.list)) {
+		if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+			goto msg_full;
+	}
+
+	nla_nest_end(msg->skb, nest);
+	return 0;
+msg_full:
+	nla_nest_cancel(msg->skb, nest);
+	return -EMSGSIZE;
+}
+
+/**
+ * tipc_parse_udp_addr - build udp media address from netlink data
+ * @nlattr:	netlink attribute containing sockaddr storage aligned address
+ * @addr:	tipc media address to fill with address, port and protocol type
+ * @scope_id:	IPv6 scope id pointer, not NULL indicates it's required
+ */
+
+static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
+			       u32 *scope_id)
+{
+	struct sockaddr_storage sa;
+
+	nla_memcpy(&sa, nla, sizeof(sa));
+	if (sa.ss_family == AF_INET) {
+		struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa;
+
+		addr->proto = htons(ETH_P_IP);
+		addr->port = ip4->sin_port;
+		addr->ipv4.s_addr = ip4->sin_addr.s_addr;
 		return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	} else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
-		int atype;
-		struct sockaddr_in6 *ip6;
+	} else if (sa.ss_family == AF_INET6) {
+		struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa;
 
-		ip6 = (struct sockaddr_in6 *)&sa_local;
-		atype = ipv6_addr_type(&ip6->sin6_addr);
-		if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
-			return -EINVAL;
+		addr->proto = htons(ETH_P_IPV6);
+		addr->port = ip6->sin6_port;
+		memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
+
+		/* Scope ID is only interesting for local addresses */
+		if (scope_id) {
+			int atype;
 
-		local->proto = htons(ETH_P_IPV6);
-		local->port = ip6->sin6_port;
-		memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
-		ub->ifindex = ip6->sin6_scope_id;
+			atype = ipv6_addr_type(&ip6->sin6_addr);
+			if (__ipv6_addr_needs_scope_id(atype) &&
+			    !ip6->sin6_scope_id) {
+				return -EINVAL;
+			}
+
+			*scope_id = ip6->sin6_scope_id ? : 0;
+		}
 
-		ip6 = (struct sockaddr_in6 *)&sa_remote;
-		remote->proto = htons(ETH_P_IPV6);
-		remote->port = ip6->sin6_port;
-		memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 		return 0;
 #endif
 	}
 	return -EADDRNOTAVAIL;
 }
 
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+	int err;
+	struct udp_media_addr addr = {0};
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	struct udp_media_addr *dst;
+
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+		return -EINVAL;
+
+	if (!opts[TIPC_NLA_UDP_REMOTE])
+		return -EINVAL;
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+	if (err)
+		return err;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (tipc_udp_is_mcast_addr(dst)) {
+		pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+		return -EINVAL;
+	}
+
+	if (tipc_udp_is_known_peer(b, &addr))
+		return 0;
+
+	return tipc_udp_rcast_add(b, &addr);
+}
+
 /**
  * tipc_udp_enable - callback to create a new udp bearer instance
  * @net:	network namespace
@@ -340,18 +643,37 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 {
 	int err = -EINVAL;
 	struct udp_bearer *ub;
-	struct udp_media_addr *remote;
+	struct udp_media_addr remote = {0};
 	struct udp_media_addr local = {0};
 	struct udp_port_cfg udp_conf = {0};
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
 
 	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
 	if (!ub)
 		return -ENOMEM;
 
-	remote = (struct udp_media_addr *)&b->bcast_addr.value;
-	memset(remote, 0, sizeof(struct udp_media_addr));
-	err = parse_options(attrs, ub, &local, remote);
+	INIT_LIST_HEAD(&ub->rcast.list);
+
+	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+		goto err;
+
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
+			     tipc_nl_udp_policy))
+		goto err;
+
+	if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
+		pr_err("Invalid UDP bearer configuration");
+		return -EINVAL;
+	}
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
+				  &ub->ifindex);
+	if (err)
+		goto err;
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
 	if (err)
 		goto err;
 
@@ -396,9 +718,18 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	tuncfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
-	err = enable_mcast(ub, remote);
+	/**
+	 * The bcast media address port is used for all peers and the ip
+	 * is used if it's a multicast address.
+	 */
+	memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+	if (tipc_udp_is_mcast_addr(&remote))
+		err = enable_mcast(ub, &remote);
+	else
+		err = tipc_udp_rcast_add(b, &remote);
 	if (err)
 		goto err;
+
 	return 0;
 err:
 	if (ub->ubsock)
@@ -411,6 +742,12 @@ err:
 static void cleanup_bearer(struct work_struct *work)
 {
 	struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+	struct udp_replicast *rcast, *tmp;
+
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		list_del_rcu(&rcast->list);
+		kfree_rcu(rcast, rcu);
+	}
 
 	if (ub->ubsock)
 		udp_tunnel_sock_release(ub->ubsock);
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
new file mode 100644
index 000000000000..281bbae87726
--- /dev/null
+++ b/net/tipc/udp_media.h
@@ -0,0 +1,46 @@
+/*
+ * net/tipc/udp_media.h: Include file for UDP bearer media
+ *
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif
+#endif
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7645e97362c0..2029b49a1df3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -906,6 +906,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
 	if (WARN_ON(wdev->netdev))
 		return;
 
+	nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+
 	list_del_rcu(&wdev->list);
 	rdev->devlist_generation++;
 
@@ -1079,6 +1081,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
 		     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
 			dev->priv_flags |= IFF_DONT_BRIDGE;
+
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 		break;
 	case NETDEV_GOING_DOWN:
 		cfg80211_leave(rdev, wdev);
@@ -1157,6 +1161,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		 * remove and clean it up.
 		 */
 		if (!list_empty(&wdev->list)) {
+			nl80211_notify_iface(rdev, wdev,
+					     NL80211_CMD_DEL_INTERFACE);
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f02653a08993..499785778983 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2751,7 +2751,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct vif_params params;
 	struct wireless_dev *wdev;
-	struct sk_buff *msg, *event;
+	struct sk_buff *msg;
 	int err;
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
@@ -2855,20 +2855,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		return -ENOBUFS;
 	}
 
-	event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (event) {
-		if (nl80211_send_iface(event, 0, 0, 0,
-				       rdev, wdev, false) < 0) {
-			nlmsg_free(event);
-			goto out;
-		}
-
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					event, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	}
+	/*
+	 * For wdevs which have no associated netdev object (e.g. of type
+	 * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+	 * For all other types, the event will be generated from the
+	 * netdev notifier
+	 */
+	if (!wdev->netdev)
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 
-out:
 	return genlmsg_reply(msg, info);
 }
 
@@ -2876,18 +2871,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct wireless_dev *wdev = info->user_ptr[1];
-	struct sk_buff *msg;
-	int status;
 
 	if (!rdev->ops->del_virtual_intf)
 		return -EOPNOTSUPP;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
-		nlmsg_free(msg);
-		msg = NULL;
-	}
-
 	/*
 	 * If we remove a wireless device without a netdev then clear
 	 * user_ptr[1] so that nl80211_post_doit won't dereference it
@@ -2898,15 +2885,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 	if (!wdev->netdev)
 		info->user_ptr[1] = NULL;
 
-	status = rdev_del_virtual_intf(rdev, wdev);
-	if (status >= 0 && msg)
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					msg, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	else
-		nlmsg_free(msg);
-
-	return status;
+	return rdev_del_virtual_intf(rdev, wdev);
 }
 
 static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -5374,6 +5353,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou
 	return 0;
 }
 
+static int nl80211_check_power_mode(const struct nlattr *nla,
+				    enum nl80211_mesh_power_mode min,
+				    enum nl80211_mesh_power_mode max,
+				    enum nl80211_mesh_power_mode *out)
+{
+	u32 val = nla_get_u32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -5518,7 +5509,7 @@ do {									    \
 				  NL80211_MESH_POWER_ACTIVE,
 				  NL80211_MESH_POWER_MAX,
 				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nl80211_check_u32);
+				  nl80211_check_power_mode);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
 				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@ -7773,12 +7764,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 
 	ibss.beacon_interval = 100;
 
-	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
 		ibss.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
-			return -EINVAL;
-	}
+
+	err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+	if (err)
+		return err;
 
 	if (!rdev->ops->join_ibss)
 		return -EOPNOTSUPP;
@@ -9252,9 +9244,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
 		setup.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (setup.beacon_interval < 10 ||
-		    setup.beacon_interval > 10000)
-			return -EINVAL;
+
+		err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+		if (err)
+			return err;
 	}
 
 	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@ -11847,6 +11840,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
 				NL80211_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+				struct wireless_dev *wdev,
+				enum nl80211_commands cmd)
+{
+	struct sk_buff *msg;
+
+	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+		cmd != NL80211_CMD_DEL_INTERFACE);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+			       cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
 static int nl80211_add_scan_req(struct sk_buff *msg,
 				struct cfg80211_registered_device *rdev)
 {
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a63f402b10b7..7e3821d7fcc5 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -7,6 +7,9 @@ int nl80211_init(void);
 void nl80211_exit(void);
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
 			  enum nl80211_commands cmd);
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+			  struct wireless_dev *wdev,
+			  enum nl80211_commands cmd);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct wireless_dev *wdev);
 struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b7d1592bd5b8..0675f513e7b9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1559,7 +1559,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 	struct wireless_dev *wdev;
 	int res = 0;
 
-	if (!beacon_int)
+	if (beacon_int < 10 || beacon_int > 10000)
 		return -EINVAL;
 
 	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {