summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/batman-adv/Kconfig15
-rw-r--r--net/batman-adv/Makefile4
-rw-r--r--net/batman-adv/bat_algo.c70
-rw-r--r--net/batman-adv/bat_algo.h3
-rw-r--r--net/batman-adv/bat_iv_ogm.c837
-rw-r--r--net/batman-adv/bat_v.c734
-rw-r--r--net/batman-adv/bat_v_ogm.c5
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c348
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h17
-rw-r--r--net/batman-adv/debugfs.c18
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/gateway_client.c285
-rw-r--r--net/batman-adv/gateway_client.h7
-rw-r--r--net/batman-adv/gateway_common.c5
-rw-r--r--net/batman-adv/hard-interface.c84
-rw-r--r--net/batman-adv/icmp_socket.h18
-rw-r--r--net/batman-adv/main.c19
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c4
-rw-r--r--net/batman-adv/netlink.c217
-rw-r--r--net/batman-adv/netlink.h6
-rw-r--r--net/batman-adv/network-coding.c11
-rw-r--r--net/batman-adv/originator.c172
-rw-r--r--net/batman-adv/originator.h4
-rw-r--r--net/batman-adv/packet.h36
-rw-r--r--net/batman-adv/routing.c43
-rw-r--r--net/batman-adv/send.c136
-rw-r--r--net/batman-adv/send.h6
-rw-r--r--net/batman-adv/soft-interface.c51
-rw-r--r--net/batman-adv/sysfs.c183
-rw-r--r--net/batman-adv/translation-table.c556
-rw-r--r--net/batman-adv/translation-table.h7
-rw-r--r--net/batman-adv/tvlv.c9
-rw-r--r--net/batman-adv/types.h69
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_netlink.c120
-rw-r--r--net/bridge/br_private.h37
-rw-r--r--net/bridge/br_switchdev.c57
-rw-r--r--net/core/dev.c85
-rw-r--r--net/core/filter.c152
-rw-r--r--net/core/flow_dissector.c164
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net_namespace.c21
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/sock.c26
-rw-r--r--net/dsa/dsa.c80
-rw-r--r--net/dsa/dsa2.c18
-rw-r--r--net/dsa/slave.c146
-rw-r--r--net/ipv4/af_inet.c23
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/igmp.c10
-rw-r--r--net/ipv4/inet_diag.c58
-rw-r--r--net/ipv4/ipconfig.c71
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/tcp_input.c47
-rw-r--r--net/ipv4/tcp_ipv4.c35
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv4/udp.c17
-rw-r--r--net/ipv4/udp_diag.c79
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ila/ila_common.c1
-rw-r--r--net/ipv6/ip6_gre.c12
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/mcast.c10
-rw-r--r--net/ipv6/sit.c12
-rw-r--r--net/ipv6/tcp_ipv6.c19
-rw-r--r--net/ipv6/udp.c13
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/kcm/Kconfig1
-rw-r--r--net/kcm/kcmproc.c58
-rw-r--r--net/kcm/kcmsock.c483
-rw-r--r--net/l2tp/l2tp_ppp.c20
-rw-r--r--net/mac80211/driver-ops.h8
-rw-r--r--net/mac80211/mesh_hwmp.c27
-rw-r--r--net/mac80211/sta_info.c25
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/tx.c28
-rw-r--r--net/netlink/diag.c103
-rw-r--r--net/rds/ib.h1
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rxrpc/af_rxrpc.c1
-rw-r--r--net/rxrpc/ar-internal.h151
-rw-r--r--net/rxrpc/call_accept.c1
-rw-r--r--net/rxrpc/call_event.c31
-rw-r--r--net/rxrpc/call_object.c34
-rw-r--r--net/rxrpc/conn_client.c912
-rw-r--r--net/rxrpc/conn_event.c116
-rw-r--r--net/rxrpc/conn_object.c123
-rw-r--r--net/rxrpc/conn_service.c6
-rw-r--r--net/rxrpc/input.c68
-rw-r--r--net/rxrpc/local_event.c1
-rw-r--r--net/rxrpc/output.c11
-rw-r--r--net/rxrpc/proc.c39
-rw-r--r--net/rxrpc/recvmsg.c1
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/rxrpc/skbuff.c72
-rw-r--r--net/rxrpc/sysctl.c33
-rw-r--r--net/sched/act_vlan.c13
-rw-r--r--net/sched/cls_basic.c12
-rw-r--r--net/sched/cls_bpf.c27
-rw-r--r--net/sched/cls_cgroup.c13
-rw-r--r--net/sched/cls_flow.c26
-rw-r--r--net/sched/cls_flower.c110
-rw-r--r--net/sched/cls_fw.c18
-rw-r--r--net/sched/cls_route.c14
-rw-r--r--net/sched/cls_rsvp.h17
-rw-r--r--net/sched/cls_tcindex.c90
-rw-r--r--net/sched/cls_u32.c21
-rw-r--r--net/sched/sch_api.c45
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_hfsc.c51
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/strparser/Kconfig4
-rw-r--r--net/strparser/Makefile1
-rw-r--r--net/strparser/strparser.c510
-rw-r--r--net/switchdev/switchdev.c87
-rw-r--r--net/sysctl_net.c29
-rw-r--r--net/tipc/bearer.c130
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/link.c18
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/netlink.c18
-rw-r--r--net/tipc/node.c63
-rw-r--r--net/tipc/node.h1
-rw-r--r--net/tipc/udp_media.c521
-rw-r--r--net/tipc/udp_media.h46
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/nl80211.c94
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/util.c2
144 files changed, 7532 insertions, 2227 deletions
diff --git a/net/Kconfig b/net/Kconfig
index c2cdbce629bd..7b6cd340b72b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -369,6 +369,7 @@ source "net/irda/Kconfig"
source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
source "net/kcm/Kconfig"
+source "net/strparser/Kconfig"
config FIB_RULES
bool
diff --git a/net/Makefile b/net/Makefile
index 9bd20bb86cc6..4cafaa2b4667 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BT) += bluetooth/
obj-$(CONFIG_SUNRPC) += sunrpc/
obj-$(CONFIG_AF_RXRPC) += rxrpc/
obj-$(CONFIG_AF_KCM) += kcm/
+obj-$(CONFIG_STREAM_PARSER) += strparser/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_L2TP) += l2tp/
obj-$(CONFIG_DECNET) += decnet/
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 0e982222d425..3b3b1a292ec8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1007,7 +1007,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
- if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+ if (strncmp(dev->name, "lec", 3))
return NOTIFY_DONE; /* we are only interested in lec:s */
switch (event) {
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 833bb145ba3c..f20742cbae6d 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -73,10 +73,21 @@ config BATMAN_ADV_MCAST
reduce the air overhead while improving the reliability of
multicast messages.
-config BATMAN_ADV_DEBUG
- bool "B.A.T.M.A.N. debugging"
+config BATMAN_ADV_DEBUGFS
+ bool "batman-adv debugfs entries"
depends on BATMAN_ADV
depends on DEBUG_FS
+ default y
+ help
+ Enable this to export routing related debug tables via debugfs.
+ The information for each soft-interface and used hard-interface can be
+ found under batman_adv/
+
+ If unsure, say Y.
+
+config BATMAN_ADV_DEBUG
+ bool "B.A.T.M.A.N. debugging"
+ depends on BATMAN_ADV_DEBUGFS
help
This is an option for use by developers; most people should
say N here. This enables compilation of support for
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index a83fc6c58d19..f724d3c98a81 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -24,14 +24,14 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
batman-adv-y += bitarray.o
batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
-batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o
batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
batman-adv-y += fragmentation.o
batman-adv-y += gateway_client.o
batman-adv-y += gateway_common.o
batman-adv-y += hard-interface.o
batman-adv-y += hash.o
-batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o
batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 81dbbf569bd4..623d04302aa2 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -20,12 +20,18 @@
#include <linux/errno.h>
#include <linux/list.h>
#include <linux/moduleparam.h>
+#include <linux/netlink.h>
#include <linux/printk.h>
#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
+#include "netlink.h"
char batadv_routing_algo[20] = "BATMAN_IV";
static struct hlist_head batadv_algo_list;
@@ -95,6 +101,7 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
return 0;
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
{
struct batadv_algo_ops *bat_algo_ops;
@@ -107,6 +114,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
return 0;
}
+#endif
static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
{
@@ -138,3 +146,65 @@ static struct kparam_string batadv_param_string_ra = {
module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ * algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_algo_ops *bat_algo_ops)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ * algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_algo_ops *bat_algo_ops;
+ int skip = cb->args[0];
+ int i = 0;
+
+ hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+ if (i++ < skip)
+ continue;
+
+ if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+ bat_algo_ops)) {
+ i--;
+ break;
+ }
+ }
+
+ cb->args[0] = i;
+
+ return msg->len;
+}
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 860d773dd8fa..3b5b69cdd12b 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -22,7 +22,9 @@
#include <linux/types.h>
+struct netlink_callback;
struct seq_file;
+struct sk_buff;
extern char batadv_routing_algo[];
extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@ void batadv_algo_init(void);
int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 19b0abd6c640..e2d18d0b1f06 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/pkt_sched.h>
#include <linux/printk.h>
#include <linux/random.h>
@@ -48,12 +49,17 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
#include "bitarray.h"
+#include "gateway_client.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
@@ -318,17 +324,18 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
if (!orig_node->bat_iv.bcast_own_sum)
goto free_orig_node;
+ kref_get(&orig_node->refcount);
hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
batadv_choose_orig, orig_node,
&orig_node->hash_entry);
if (hash_added != 0)
- goto free_orig_node;
+ goto free_orig_node_hash;
return orig_node;
-free_orig_node:
- /* free twice, as batadv_orig_node_new sets refcount to 2 */
+free_orig_node_hash:
batadv_orig_node_put(orig_node);
+free_orig_node:
batadv_orig_node_put(orig_node);
return NULL;
@@ -528,36 +535,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
{
struct net_device *soft_iface;
- struct batadv_priv *bat_priv;
- struct batadv_hard_iface *primary_if = NULL;
if (!forw_packet->if_incoming) {
pr_err("Error - can't forward packet: incoming iface not specified\n");
- goto out;
+ return;
}
soft_iface = forw_packet->if_incoming->soft_iface;
- bat_priv = netdev_priv(soft_iface);
if (WARN_ON(!forw_packet->if_outgoing))
- goto out;
+ return;
if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
- goto out;
+ return;
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
- goto out;
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
+ return;
/* only for one specific outgoing interface */
batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
-
-out:
- if (primary_if)
- batadv_hardif_put(primary_if);
}
/**
@@ -685,19 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
struct batadv_forw_packet *forw_packet_aggr;
unsigned char *skb_buff;
unsigned int skb_size;
+ atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
- /* own packet should always be scheduled */
- if (!own_packet) {
- if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "batman packet queue full\n");
- return;
- }
- }
-
- forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+ forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+ queue_left, bat_priv);
if (!forw_packet_aggr)
- goto out_nomem;
+ return;
if (atomic_read(&bat_priv->aggregated_ogms) &&
packet_len < BATADV_MAX_AGGREGATION_BYTES)
@@ -708,8 +697,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb_size += ETH_HLEN;
forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
- if (!forw_packet_aggr->skb)
- goto out_free_forw_packet;
+ if (!forw_packet_aggr->skb) {
+ batadv_forw_packet_free(forw_packet_aggr);
+ return;
+ }
+
forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
@@ -717,12 +709,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
forw_packet_aggr->packet_len = packet_len;
memcpy(skb_buff, packet_buff, packet_len);
- kref_get(&if_incoming->refcount);
- kref_get(&if_outgoing->refcount);
forw_packet_aggr->own = own_packet;
- forw_packet_aggr->if_incoming = if_incoming;
- forw_packet_aggr->if_outgoing = if_outgoing;
- forw_packet_aggr->num_packets = 0;
forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
forw_packet_aggr->send_time = send_time;
@@ -741,13 +728,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
queue_delayed_work(batadv_event_workqueue,
&forw_packet_aggr->delayed_work,
send_time - jiffies);
-
- return;
-out_free_forw_packet:
- kfree(forw_packet_aggr);
-out_nomem:
- if (!own_packet)
- atomic_inc(&bat_priv->batman_queue_left);
}
/* aggregate a new packet into the existing ogm packet */
@@ -1830,10 +1810,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
batadv_iv_ogm_schedule(forw_packet->if_incoming);
out:
- /* don't count own packet */
- if (!forw_packet->own)
- atomic_inc(&bat_priv->batman_queue_left);
-
batadv_forw_packet_free(forw_packet);
}
@@ -1879,6 +1855,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
return NET_RX_SUCCESS;
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
* @orig_node: the orig_node for which the neighbors are printed
@@ -1976,8 +1953,239 @@ next:
if (batman_count == 0)
seq_puts(seq, "No batman nodes in range ...\n");
}
+#endif
+
+/**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ * given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+ struct batadv_hard_iface *if_outgoing,
+ u8 *tq_avg)
+{
+ struct batadv_neigh_ifinfo *n_ifinfo;
+
+ n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+ if (!n_ifinfo)
+ return false;
+
+ *tq_avg = n_ifinfo->bat_iv.tq_avg;
+ batadv_neigh_ifinfo_put(n_ifinfo);
+
+ return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ * message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node,
+ bool best)
+{
+ void *hdr;
+ u8 tq_avg;
+ unsigned int last_seen_msecs;
+
+ last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+ if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+ return 0;
+
+ if (if_outgoing != BATADV_IF_DEFAULT &&
+ if_outgoing != neigh_node->if_incoming)
+ return 0;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig_node->orig) ||
+ nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+ neigh_node->addr) ||
+ nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ neigh_node->if_incoming->net_dev->ifindex) ||
+ nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+ last_seen_msecs))
+ goto nla_put_failure;
+
+ if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct batadv_orig_node *orig_node, int *sub_s)
+{
+ struct batadv_neigh_node *neigh_node_best;
+ struct batadv_neigh_node *neigh_node;
+ int sub = 0;
+ bool best;
+ u8 tq_avg_best;
+
+ neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+ if (!neigh_node_best)
+ goto out;
+
+ if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+ &tq_avg_best))
+ goto out;
+
+ if (tq_avg_best == 0)
+ goto out;
+
+ hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+ if (sub++ < *sub_s)
+ continue;
+
+ best = (neigh_node == neigh_node_best);
+
+ if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+ bat_priv, if_outgoing,
+ orig_node, neigh_node,
+ best)) {
+ batadv_neigh_node_put(neigh_node_best);
+
+ *sub_s = sub - 1;
+ return -EMSGSIZE;
+ }
+ }
+
+ out:
+ if (neigh_node_best)
+ batadv_neigh_node_put(neigh_node_best);
+
+ *sub_s = 0;
+ return 0;
+}
/**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ * message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct hlist_head *head, int *idx_s, int *sub)
+{
+ struct batadv_orig_node *orig_node;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+ if_outgoing, orig_node,
+ sub)) {
+ rcu_read_unlock();
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+ rcu_read_unlock();
+
+ *idx_s = 0;
+ *sub = 0;
+ return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_head *head;
+ int bucket = cb->args[0];
+ int idx = cb->args[1];
+ int sub = cb->args[2];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv, if_outgoing, head,
+ &idx, &sub))
+ break;
+
+ bucket++;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+ cb->args[2] = sub;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
* batadv_iv_hardif_neigh_print - print a single hop neighbour node
* @seq: neighbour table seq_file struct
* @hardif_neigh: hardif neighbour information
@@ -2027,37 +2235,43 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
if (batman_count == 0)
seq_puts(seq, "No batman nodes in range ...\n");
}
+#endif
/**
- * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
* @neigh1: the first neighbor object of the comparison
* @if_outgoing1: outgoing interface for the first neighbor
* @neigh2: the second neighbor object of the comparison
* @if_outgoing2: outgoing interface for the second neighbor
+ * @diff: pointer to integer receiving the calculated difference
*
- * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
- * lower, the same as or higher than the metric via neigh2
+ * The content of *@diff is only valid when this function returns true.
+ * It is less, equal to or greater than 0 if the metric via neigh1 is lower,
+ * the same as or higher than the metric via neigh2
+ *
+ * Return: true when the difference could be calculated, false otherwise
*/
-static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
- struct batadv_hard_iface *if_outgoing1,
- struct batadv_neigh_node *neigh2,
- struct batadv_hard_iface *if_outgoing2)
+static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2,
+ int *diff)
{
struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
u8 tq1, tq2;
- int diff;
+ bool ret = true;
neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
if (!neigh1_ifinfo || !neigh2_ifinfo) {
- diff = 0;
+ ret = false;
goto out;
}
tq1 = neigh1_ifinfo->bat_iv.tq_avg;
tq2 = neigh2_ifinfo->bat_iv.tq_avg;
- diff = tq1 - tq2;
+ *diff = (int)tq1 - (int)tq2;
out:
if (neigh1_ifinfo)
@@ -2065,6 +2279,162 @@ out:
if (neigh2_ifinfo)
batadv_neigh_ifinfo_put(neigh2_ifinfo);
+ return ret;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ void *hdr;
+ unsigned int last_seen_msecs;
+
+ last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+ hardif_neigh->addr) ||
+ nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ hardif_neigh->if_incoming->net_dev->ifindex) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+ last_seen_msecs))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ * into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface,
+ int *idx_s)
+{
+ struct batadv_hardif_neigh_node *hardif_neigh;
+ int idx = 0;
+
+ hlist_for_each_entry_rcu(hardif_neigh,
+ &hard_iface->neigh_list, list) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+ hardif_neigh)) {
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+
+ *idx_s = 0;
+ return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *single_hardif)
+{
+ struct batadv_hard_iface *hard_iface;
+ int i_hardif = 0;
+ int i_hardif_s = cb->args[0];
+ int idx = cb->args[1];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ rcu_read_lock();
+ if (single_hardif) {
+ if (i_hardif_s == 0) {
+ if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv,
+ single_hardif,
+ &idx) == 0)
+ i_hardif++;
+ }
+ } else {
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+ list) {
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (i_hardif++ < i_hardif_s)
+ continue;
+
+ if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv,
+ hard_iface, &idx)) {
+ i_hardif--;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ cb->args[0] = i_hardif;
+ cb->args[1] = idx;
+}
+
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ *
+ * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2)
+{
+ bool ret;
+ int diff;
+
+ ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+ if_outgoing2, &diff);
+ if (!ret)
+ return 0;
+
return diff;
}
@@ -2085,36 +2455,341 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2)
{
- struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
- u8 tq1, tq2;
bool ret;
+ int diff;
- neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
- neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+ ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+ if_outgoing2, &diff);
+ if (!ret)
+ return false;
- /* we can't say that the metric is better */
- if (!neigh1_ifinfo || !neigh2_ifinfo) {
- ret = false;
+ ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
+ return ret;
+}
+
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+ /* begin scheduling originator messages on that interface */
+ batadv_iv_ogm_schedule(hard_iface);
+}
+
+static struct batadv_gw_node *
+batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+ struct batadv_neigh_node *router;
+ struct batadv_neigh_ifinfo *router_ifinfo;
+ struct batadv_gw_node *gw_node, *curr_gw = NULL;
+ u64 max_gw_factor = 0;
+ u64 tmp_gw_factor = 0;
+ u8 max_tq = 0;
+ u8 tq_avg;
+ struct batadv_orig_node *orig_node;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ orig_node = gw_node->orig_node;
+ router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ continue;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router,
+ BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto next;
+
+ if (!kref_get_unless_zero(&gw_node->refcount))
+ goto next;
+
+ tq_avg = router_ifinfo->bat_iv.tq_avg;
+
+ switch (atomic_read(&bat_priv->gw.sel_class)) {
+ case 1: /* fast connection */
+ tmp_gw_factor = tq_avg * tq_avg;
+ tmp_gw_factor *= gw_node->bandwidth_down;
+ tmp_gw_factor *= 100 * 100;
+ tmp_gw_factor >>= 18;
+
+ if ((tmp_gw_factor > max_gw_factor) ||
+ ((tmp_gw_factor == max_gw_factor) &&
+ (tq_avg > max_tq))) {
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+ curr_gw = gw_node;
+ kref_get(&curr_gw->refcount);
+ }
+ break;
+
+ default: /* 2: stable connection (use best statistic)
+ * 3: fast-switch (use best statistic but change as
+ * soon as a better gateway appears)
+ * XX: late-switch (use best statistic but change as
+ * soon as a better gateway appears which has
+ * $routing_class more tq points)
+ */
+ if (tq_avg > max_tq) {
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+ curr_gw = gw_node;
+ kref_get(&curr_gw->refcount);
+ }
+ break;
+ }
+
+ if (tq_avg > max_tq)
+ max_tq = tq_avg;
+
+ if (tmp_gw_factor > max_gw_factor)
+ max_gw_factor = tmp_gw_factor;
+
+ batadv_gw_node_put(gw_node);
+
+next:
+ batadv_neigh_node_put(router);
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ }
+ rcu_read_unlock();
+
+ return curr_gw;
+}
+
+static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *curr_gw_orig,
+ struct batadv_orig_node *orig_node)
+{
+ struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL;
+ struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL;
+ struct batadv_neigh_node *router_gw = NULL;
+ struct batadv_neigh_node *router_orig = NULL;
+ u8 gw_tq_avg, orig_tq_avg;
+ bool ret = false;
+
+ /* dynamic re-election is performed only on fast or late switch */
+ if (atomic_read(&bat_priv->gw.sel_class) <= 2)
+ return false;
+
+ router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
+ if (!router_gw) {
+ ret = true;
goto out;
}
- tq1 = neigh1_ifinfo->bat_iv.tq_avg;
- tq2 = neigh2_ifinfo->bat_iv.tq_avg;
- ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
+ router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw,
+ BATADV_IF_DEFAULT);
+ if (!router_gw_ifinfo) {
+ ret = true;
+ goto out;
+ }
+
+ router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+ if (!router_orig)
+ goto out;
+
+ router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig,
+ BATADV_IF_DEFAULT);
+ if (!router_orig_ifinfo)
+ goto out;
+
+ gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg;
+ orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg;
+
+ /* the TQ value has to be better */
+ if (orig_tq_avg < gw_tq_avg)
+ goto out;
+ /* if the routing class is greater than 3 the value tells us how much
+ * greater the TQ value of the new gateway must be
+ */
+ if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+ (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+ goto out;
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
+ gw_tq_avg, orig_tq_avg);
+
+ ret = true;
out:
- if (neigh1_ifinfo)
- batadv_neigh_ifinfo_put(neigh1_ifinfo);
- if (neigh2_ifinfo)
- batadv_neigh_ifinfo_put(neigh2_ifinfo);
+ if (router_gw_ifinfo)
+ batadv_neigh_ifinfo_put(router_gw_ifinfo);
+ if (router_orig_ifinfo)
+ batadv_neigh_ifinfo_put(router_orig_ifinfo);
+ if (router_gw)
+ batadv_neigh_node_put(router_gw);
+ if (router_orig)
+ batadv_neigh_node_put(router_orig);
return ret;
}
-static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
+ struct seq_file *seq,
+ const struct batadv_gw_node *gw_node)
{
- /* begin scheduling originator messages on that interface */
- batadv_iv_ogm_schedule(hard_iface);
+ struct batadv_gw_node *curr_gw;
+ struct batadv_neigh_node *router;
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+ int ret = -1;
+
+ router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ goto out;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto out;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+ seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+ (curr_gw == gw_node ? "=>" : " "),
+ gw_node->orig_node->orig,
+ router_ifinfo->bat_iv.tq_avg, router->addr,
+ router->if_incoming->net_dev->name,
+ gw_node->bandwidth_down / 10,
+ gw_node->bandwidth_down % 10,
+ gw_node->bandwidth_up / 10,
+ gw_node->bandwidth_up % 10);
+ ret = seq_has_overflowed(seq) ? -1 : 0;
+
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ if (router)
+ batadv_neigh_node_put(router);
+ return ret;
+}
+
+static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
+ struct seq_file *seq)
+{
+ struct batadv_gw_node *gw_node;
+ int gw_count = 0;
+
+ seq_puts(seq,
+ " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ /* fails if orig_node has no router */
+ if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+ continue;
+
+ gw_count++;
+ }
+ rcu_read_unlock();
+
+ if (gw_count == 0)
+ seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_gw_node *gw_node)
+{
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+ struct batadv_neigh_node *router;
+ struct batadv_gw_node *curr_gw;
+ int ret = -EINVAL;
+ void *hdr;
+
+ router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ goto out;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto out;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ ret = -EMSGSIZE;
+
+ if (curr_gw == gw_node)
+ if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ gw_node->orig_node->orig) ||
+ nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) ||
+ nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN,
+ router->addr) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ router->if_incoming->net_dev->name) ||
+ nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+ gw_node->bandwidth_down) ||
+ nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
+ gw_node->bandwidth_up)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ genlmsg_end(msg, hdr);
+ ret = 0;
+
+out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ if (router)
+ batadv_neigh_node_put(router);
+ return ret;
+}
+
+/**
+ * batadv_iv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv)
+{
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_gw_node *gw_node;
+ int idx_skip = cb->args[0];
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ if (idx++ < idx_skip)
+ continue;
+
+ if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, gw_node)) {
+ idx_skip = idx - 1;
+ goto unlock;
+ }
+ }
+
+ idx_skip = idx;
+unlock:
+ rcu_read_unlock();
+
+ cb->args[0] = idx_skip;
}
static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
@@ -2129,14 +2804,28 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.neigh = {
.cmp = batadv_iv_ogm_neigh_cmp,
.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
.print = batadv_iv_neigh_print,
+#endif
+ .dump = batadv_iv_ogm_neigh_dump,
},
.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
.print = batadv_iv_ogm_orig_print,
+#endif
+ .dump = batadv_iv_ogm_orig_dump,
.free = batadv_iv_ogm_orig_free,
.add_if = batadv_iv_ogm_orig_add_if,
.del_if = batadv_iv_ogm_orig_del_if,
},
+ .gw = {
+ .get_best_gw_node = batadv_iv_gw_get_best_gw_node,
+ .is_eligible = batadv_iv_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ .print = batadv_iv_gw_print,
+#endif
+ .dump = batadv_iv_gw_dump,
+ },
};
int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0366cbf5e444..e79f6f01182e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -21,24 +21,38 @@
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "packet.h"
+struct sk_buff;
+
static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -115,6 +129,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
batadv_v_elp_throughput_metric_update);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_v_orig_print_neigh - print neighbors for the originator table
* @orig_node: the orig_node for which the neighbors are printed
@@ -198,8 +213,142 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
if (batman_count == 0)
seq_puts(seq, "No batman nodes in range ...\n");
}
+#endif
+
+/**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ void *hdr;
+ unsigned int last_seen_msecs;
+ u32 throughput;
+
+ last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+ throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+ throughput = throughput * 100;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_NEIGHBORS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+ hardif_neigh->addr) ||
+ nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ hardif_neigh->if_incoming->net_dev->ifindex) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+ last_seen_msecs) ||
+ nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
/**
+ * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into
+ * a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface,
+ int *idx_s)
+{
+ struct batadv_hardif_neigh_node *hardif_neigh;
+ int idx = 0;
+
+ hlist_for_each_entry_rcu(hardif_neigh,
+ &hard_iface->neigh_list, list) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+
+ *idx_s = 0;
+ return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a
+ * message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *single_hardif)
+{
+ struct batadv_hard_iface *hard_iface;
+ int i_hardif = 0;
+ int i_hardif_s = cb->args[0];
+ int idx = cb->args[1];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ rcu_read_lock();
+ if (single_hardif) {
+ if (i_hardif_s == 0) {
+ if (batadv_v_neigh_dump_hardif(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv, single_hardif,
+ &idx) == 0)
+ i_hardif++;
+ }
+ } else {
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (i_hardif++ < i_hardif_s)
+ continue;
+
+ if (batadv_v_neigh_dump_hardif(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv, hard_iface,
+ &idx)) {
+ i_hardif--;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ cb->args[0] = i_hardif;
+ cb->args[1] = idx;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
* batadv_v_orig_print - print the originator table
* @bat_priv: the bat priv with all the soft interface information
* @seq: debugfs table seq_file struct
@@ -265,6 +414,205 @@ next:
if (batman_count == 0)
seq_puts(seq, "No batman nodes in range ...\n");
}
+#endif
+
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ * message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node,
+ bool best)
+{
+ struct batadv_neigh_ifinfo *n_ifinfo;
+ unsigned int last_seen_msecs;
+ u32 throughput;
+ void *hdr;
+
+ n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+ if (!n_ifinfo)
+ return 0;
+
+ throughput = n_ifinfo->bat_v.throughput * 100;
+
+ batadv_neigh_ifinfo_put(n_ifinfo);
+
+ last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+ if (if_outgoing != BATADV_IF_DEFAULT &&
+ if_outgoing != neigh_node->if_incoming)
+ return 0;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_ORIGINATORS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+ nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+ neigh_node->addr) ||
+ nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ neigh_node->if_incoming->net_dev->ifindex) ||
+ nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+ last_seen_msecs))
+ goto nla_put_failure;
+
+ if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct batadv_orig_node *orig_node, int *sub_s)
+{
+ struct batadv_neigh_node *neigh_node_best;
+ struct batadv_neigh_node *neigh_node;
+ int sub = 0;
+ bool best;
+
+ neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+ if (!neigh_node_best)
+ goto out;
+
+ hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+ if (sub++ < *sub_s)
+ continue;
+
+ best = (neigh_node == neigh_node_best);
+
+ if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+ if_outgoing, orig_node,
+ neigh_node, best)) {
+ batadv_neigh_node_put(neigh_node_best);
+
+ *sub_s = sub - 1;
+ return -EMSGSIZE;
+ }
+ }
+
+ out:
+ if (neigh_node_best)
+ batadv_neigh_node_put(neigh_node_best);
+
+ *sub_s = 0;
+ return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ * message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing,
+ struct hlist_head *head, int *idx_s, int *sub)
+{
+ struct batadv_orig_node *orig_node;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+ if_outgoing, orig_node, sub)) {
+ rcu_read_unlock();
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+ rcu_read_unlock();
+
+ *idx_s = 0;
+ *sub = 0;
+ return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_head *head;
+ int bucket = cb->args[0];
+ int idx = cb->args[1];
+ int sub = cb->args[2];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_v_orig_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq,
+ bat_priv, if_outgoing, head, &idx,
+ &sub))
+ break;
+
+ bucket++;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+ cb->args[2] = sub;
+}
static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing1,
@@ -320,6 +668,365 @@ err_ifinfo1:
return ret;
}
+static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
+ char *buff, size_t count)
+{
+ u32 old_class, class;
+
+ if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
+ "B.A.T.M.A.N. V GW selection class",
+ &class))
+ return -EINVAL;
+
+ old_class = atomic_read(&bat_priv->gw.sel_class);
+ atomic_set(&bat_priv->gw.sel_class, class);
+
+ if (old_class != class)
+ batadv_gw_reselect(bat_priv);
+
+ return count;
+}
+
+static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
+{
+ u32 class = atomic_read(&bat_priv->gw.sel_class);
+
+ return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10);
+}
+
+/**
+ * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * @gw_node: the GW to retrieve the metric for
+ * @bw: the pointer where the metric will be stored. The metric is computed as
+ * the minimum between the GW advertised throughput and the path throughput to
+ * it in the mesh
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
+{
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+ struct batadv_orig_node *orig_node;
+ struct batadv_neigh_node *router;
+ int ret = -1;
+
+ orig_node = gw_node->orig_node;
+ router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ goto out;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto out;
+
+ /* the GW metric is computed as the minimum between the path throughput
+ * to reach the GW itself and the advertised bandwidth.
+ * This gives us an approximation of the effective throughput that the
+ * client can expect via this particular GW node
+ */
+ *bw = router_ifinfo->bat_v.throughput;
+ *bw = min_t(u32, *bw, gw_node->bandwidth_down);
+
+ ret = 0;
+out:
+ if (router)
+ batadv_neigh_node_put(router);
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+
+ return ret;
+}
+
+/**
+ * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: the GW node having the best GW-metric, NULL if no GW is known
+ */
+static struct batadv_gw_node *
+batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+ struct batadv_gw_node *gw_node, *curr_gw = NULL;
+ u32 max_bw = 0, bw;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ if (!kref_get_unless_zero(&gw_node->refcount))
+ continue;
+
+ if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
+ goto next;
+
+ if (curr_gw && (bw <= max_bw))
+ goto next;
+
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+
+ curr_gw = gw_node;
+ kref_get(&curr_gw->refcount);
+ max_bw = bw;
+
+next:
+ batadv_gw_node_put(gw_node);
+ }
+ rcu_read_unlock();
+
+ return curr_gw;
+}
+
+/**
+ * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * @bat_priv: the bat priv with all the soft interface information
+ * @curr_gw_orig: originator representing the currently selected GW
+ * @orig_node: the originator representing the new candidate
+ *
+ * Return: true if orig_node can be selected as current GW, false otherwise
+ */
+static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *curr_gw_orig,
+ struct batadv_orig_node *orig_node)
+{
+ struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+ u32 gw_throughput, orig_throughput, threshold;
+ bool ret = false;
+
+ threshold = atomic_read(&bat_priv->gw.sel_class);
+
+ curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig);
+ if (!curr_gw) {
+ ret = true;
+ goto out;
+ }
+
+ if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) {
+ ret = true;
+ goto out;
+ }
+
+ orig_gw = batadv_gw_node_get(bat_priv, orig_node);
+ if (!orig_node)
+ goto out;
+
+ if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
+ goto out;
+
+ if (orig_throughput < gw_throughput)
+ goto out;
+
+ if ((orig_throughput - gw_throughput) < threshold)
+ goto out;
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n",
+ gw_throughput, orig_throughput);
+
+ ret = true;
+out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+ if (orig_gw)
+ batadv_gw_node_put(orig_gw);
+
+ return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
+ struct seq_file *seq,
+ const struct batadv_gw_node *gw_node)
+{
+ struct batadv_gw_node *curr_gw;
+ struct batadv_neigh_node *router;
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+ int ret = -1;
+
+ router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ goto out;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto out;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+ seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n",
+ (curr_gw == gw_node ? "=>" : " "),
+ gw_node->orig_node->orig,
+ router_ifinfo->bat_v.throughput / 10,
+ router_ifinfo->bat_v.throughput % 10, router->addr,
+ router->if_incoming->net_dev->name,
+ gw_node->bandwidth_down / 10,
+ gw_node->bandwidth_down % 10,
+ gw_node->bandwidth_up / 10,
+ gw_node->bandwidth_up % 10);
+ ret = seq_has_overflowed(seq) ? -1 : 0;
+
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
+out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ if (router)
+ batadv_neigh_node_put(router);
+ return ret;
+}
+
+/**
+ * batadv_v_gw_print - print the gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: gateway table seq_file struct
+ */
+static void batadv_v_gw_print(struct batadv_priv *bat_priv,
+ struct seq_file *seq)
+{
+ struct batadv_gw_node *gw_node;
+ int gw_count = 0;
+
+ seq_puts(seq,
+ " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ /* fails if orig_node has no router */
+ if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+ continue;
+
+ gw_count++;
+ }
+ rcu_read_unlock();
+
+ if (gw_count == 0)
+ seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_gw_node *gw_node)
+{
+ struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+ struct batadv_neigh_node *router;
+ struct batadv_gw_node *curr_gw;
+ int ret = -EINVAL;
+ void *hdr;
+
+ router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+ if (!router)
+ goto out;
+
+ router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+ if (!router_ifinfo)
+ goto out;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ ret = -EMSGSIZE;
+
+ if (curr_gw == gw_node) {
+ if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+ }
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ gw_node->orig_node->orig)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT,
+ router_ifinfo->bat_v.throughput)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ router->if_incoming->net_dev->name)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+ gw_node->bandwidth_down)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ genlmsg_end(msg, hdr);
+ ret = 0;
+
+out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ if (router)
+ batadv_neigh_node_put(router);
+ return ret;
+}
+
+/**
+ * batadv_v_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *bat_priv)
+{
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_gw_node *gw_node;
+ int idx_skip = cb->args[0];
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+ if (idx++ < idx_skip)
+ continue;
+
+ if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, gw_node)) {
+ idx_skip = idx - 1;
+ goto unlock;
+ }
+ }
+
+ idx_skip = idx;
+unlock:
+ rcu_read_unlock();
+
+ cb->args[0] = idx_skip;
+}
+
static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.name = "BATMAN_V",
.iface = {
@@ -333,10 +1040,26 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.hardif_init = batadv_v_hardif_neigh_init,
.cmp = batadv_v_neigh_cmp,
.is_similar_or_better = batadv_v_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
.print = batadv_v_neigh_print,
+#endif
+ .dump = batadv_v_neigh_dump,
},
.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
.print = batadv_v_orig_print,
+#endif
+ .dump = batadv_v_orig_dump,
+ },
+ .gw = {
+ .store_sel_class = batadv_v_store_sel_class,
+ .show_sel_class = batadv_v_show_sel_class,
+ .get_best_gw_node = batadv_v_gw_get_best_gw_node,
+ .is_eligible = batadv_v_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ .print = batadv_v_gw_print,
+#endif
+ .dump = batadv_v_gw_dump,
},
};
@@ -363,7 +1086,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
*/
int batadv_v_mesh_init(struct batadv_priv *bat_priv)
{
- return batadv_v_ogm_init(bat_priv);
+ int ret = 0;
+
+ ret = batadv_v_ogm_init(bat_priv);
+ if (ret < 0)
+ return ret;
+
+ /* set default throughput difference threshold to 5Mbps */
+ atomic_set(&bat_priv->gw.sel_class, 50);
+
+ return 0;
}
/**
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 6fbba4eb0617..1aeeadca620c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -73,13 +73,12 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
if (!orig_node)
return NULL;
+ kref_get(&orig_node->refcount);
hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
batadv_choose_orig, orig_node,
&orig_node->hash_entry);
if (hash_added != 0) {
- /* orig_node->refcounter is initialised to 2 by
- * batadv_orig_node_new()
- */
+ /* remove refcnt for newly created orig_node and hash entry */
batadv_orig_node_put(orig_node);
batadv_orig_node_put(orig_node);
orig_node = NULL;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ad2ffe16d29f..e7f690b571ea 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -45,12 +46,18 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "packet.h"
+#include "soft-interface.h"
#include "sysfs.h"
#include "translation-table.h"
@@ -519,11 +526,9 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
atomic_set(&entry->wait_periods, 0);
ether_addr_copy(entry->orig, orig);
INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
-
- /* one for the hash, one for returning */
kref_init(&entry->refcount);
- kref_get(&entry->refcount);
+ kref_get(&entry->refcount);
hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
batadv_compare_backbone_gw,
batadv_choose_backbone_gw, entry,
@@ -711,12 +716,13 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
claim->lasttime = jiffies;
kref_get(&backbone_gw->refcount);
claim->backbone_gw = backbone_gw;
-
kref_init(&claim->refcount);
- kref_get(&claim->refcount);
+
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
mac, BATADV_PRINT_VID(vid));
+
+ kref_get(&claim->refcount);
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
batadv_compare_claim,
batadv_choose_claim, claim,
@@ -1148,7 +1154,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
/* Let the loopdetect frames on the mesh in any case. */
if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
- return 0;
+ return false;
/* check if it is a claim frame. */
ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1990,6 +1996,7 @@ out:
return ret;
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
* @seq: seq file to print on
@@ -2050,8 +2057,172 @@ out:
batadv_hardif_put(primary_if);
return 0;
}
+#endif
+
+/**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hard_iface *primary_if,
+ struct batadv_bla_claim *claim)
+{
+ u8 *primary_addr = primary_if->net_dev->dev_addr;
+ u16 backbone_crc;
+ bool is_own;
+ void *hdr;
+ int ret = -EINVAL;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ is_own = batadv_compare_eth(claim->backbone_gw->orig,
+ primary_addr);
+
+ spin_lock_bh(&claim->backbone_gw->crc_lock);
+ backbone_crc = claim->backbone_gw->crc;
+ spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+ if (is_own)
+ if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+ nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+ nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+ claim->backbone_gw->orig) ||
+ nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+ backbone_crc)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ genlmsg_end(msg, hdr);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hard_iface *primary_if,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_bla_claim *claim;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ if (idx++ < *idx_skip)
+ continue;
+ if (batadv_bla_claim_dump_entry(msg, portid, seq,
+ primary_if, claim)) {
+ *idx_skip = idx - 1;
+ goto unlock;
+ }
+ }
+
+ *idx_skip = idx;
+unlock:
+ rcu_read_unlock();
+ return 0;
+}
/**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->bla.claim_hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_bla_claim_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq,
+ primary_if, head, &idx))
+ break;
+ bucket++;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
* batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
* file
* @seq: seq file to print on
@@ -2114,3 +2285,168 @@ out:
batadv_hardif_put(primary_if);
return 0;
}
+#endif
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hard_iface *primary_if,
+ struct batadv_bla_backbone_gw *backbone_gw)
+{
+ u8 *primary_addr = primary_if->net_dev->dev_addr;
+ u16 backbone_crc;
+ bool is_own;
+ int msecs;
+ void *hdr;
+ int ret = -EINVAL;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+ spin_lock_bh(&backbone_gw->crc_lock);
+ backbone_crc = backbone_gw->crc;
+ spin_unlock_bh(&backbone_gw->crc_lock);
+
+ msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+ if (is_own)
+ if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+ backbone_gw->orig) ||
+ nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+ nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+ backbone_crc) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
+ genlmsg_end(msg, hdr);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hard_iface *primary_if,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_bla_backbone_gw *backbone_gw;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+ if (idx++ < *idx_skip)
+ continue;
+ if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+ primary_if, backbone_gw)) {
+ *idx_skip = idx - 1;
+ goto unlock;
+ }
+ }
+
+ *idx_skip = idx;
+unlock:
+ rcu_read_unlock();
+ return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->bla.backbone_hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_bla_backbone_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq,
+ primary_if, head, &idx))
+ break;
+ bucket++;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0f01daeb359e..1ae93e46fb98 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
struct net_device;
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -35,8 +36,10 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
int hdr_size);
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid);
bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -47,7 +50,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
#define BATADV_BLA_CRC_INIT 0
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
@@ -112,6 +115,18 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv)
{
}
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* ifdef CONFIG_BATMAN_ADV_BLA */
#endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 1d68b6e63b96..b4ffba7dd583 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -31,6 +31,7 @@
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/sysfs.h>
+#include <net/net_namespace.h>
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
@@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void)
*/
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
{
+ struct net *net = dev_net(hard_iface->net_dev);
struct batadv_debuginfo **bat_debug;
struct dentry *file;
if (!batadv_debugfs)
goto out;
+ if (net != &init_net)
+ return 0;
+
hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
batadv_debugfs);
if (!hard_iface->debug_dir)
@@ -341,6 +346,11 @@ out:
*/
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
{
+ struct net *net = dev_net(hard_iface->net_dev);
+
+ if (net != &init_net)
+ return;
+
if (batadv_debugfs) {
debugfs_remove_recursive(hard_iface->debug_dir);
hard_iface->debug_dir = NULL;
@@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
struct batadv_debuginfo **bat_debug;
+ struct net *net = dev_net(dev);
struct dentry *file;
if (!batadv_debugfs)
goto out;
+ if (net != &init_net)
+ return 0;
+
bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
if (!bat_priv->debug_dir)
goto out;
@@ -392,6 +406,10 @@ out:
void batadv_debugfs_del_meshif(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+
+ if (net != &init_net)
+ return;
batadv_debug_log_cleanup(bat_priv);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 1ab4e2e63afc..c68ff3dcb926 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -26,7 +26,7 @@ struct net_device;
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
-#if IS_ENABLED(CONFIG_DEBUG_FS)
+#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b1cc8bfe11ac..e257efdc5d03 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -343,8 +343,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
ether_addr_copy(dat_entry->mac_addr, mac_addr);
dat_entry->last_update = jiffies;
kref_init(&dat_entry->refcount);
- kref_get(&dat_entry->refcount);
+ kref_get(&dat_entry->refcount);
hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
batadv_hash_dat, dat_entry,
&dat_entry->hash_entry);
@@ -795,6 +795,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
batadv_dat_hash_free(bat_priv);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_dat_cache_seq_print_text - print the local DAT hash table
* @seq: seq file to print on
@@ -846,6 +847,7 @@ out:
batadv_hardif_put(primary_if);
return 0;
}
+#endif
/**
* batadv_arp_get_type - parse an ARP packet and gets the type
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 63a805d3f96e..de055d64debe 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/if_ether.h>
@@ -31,6 +32,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -39,13 +41,17 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "gateway_common.h"
#include "hard-interface.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
+#include "soft-interface.h"
#include "sysfs.h"
#include "translation-table.h"
@@ -80,12 +86,12 @@ static void batadv_gw_node_release(struct kref *ref)
* batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
* @gw_node: gateway node to free
*/
-static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+void batadv_gw_node_put(struct batadv_gw_node *gw_node)
{
kref_put(&gw_node->refcount, batadv_gw_node_release);
}
-static struct batadv_gw_node *
+struct batadv_gw_node *
batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *gw_node;
@@ -164,86 +170,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.reselect, 1);
}
-static struct batadv_gw_node *
-batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
-{
- struct batadv_neigh_node *router;
- struct batadv_neigh_ifinfo *router_ifinfo;
- struct batadv_gw_node *gw_node, *curr_gw = NULL;
- u64 max_gw_factor = 0;
- u64 tmp_gw_factor = 0;
- u8 max_tq = 0;
- u8 tq_avg;
- struct batadv_orig_node *orig_node;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
- orig_node = gw_node->orig_node;
- router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
- if (!router)
- continue;
-
- router_ifinfo = batadv_neigh_ifinfo_get(router,
- BATADV_IF_DEFAULT);
- if (!router_ifinfo)
- goto next;
-
- if (!kref_get_unless_zero(&gw_node->refcount))
- goto next;
-
- tq_avg = router_ifinfo->bat_iv.tq_avg;
-
- switch (atomic_read(&bat_priv->gw.sel_class)) {
- case 1: /* fast connection */
- tmp_gw_factor = tq_avg * tq_avg;
- tmp_gw_factor *= gw_node->bandwidth_down;
- tmp_gw_factor *= 100 * 100;
- tmp_gw_factor >>= 18;
-
- if ((tmp_gw_factor > max_gw_factor) ||
- ((tmp_gw_factor == max_gw_factor) &&
- (tq_avg > max_tq))) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- curr_gw = gw_node;
- kref_get(&curr_gw->refcount);
- }
- break;
-
- default: /* 2: stable connection (use best statistic)
- * 3: fast-switch (use best statistic but change as
- * soon as a better gateway appears)
- * XX: late-switch (use best statistic but change as
- * soon as a better gateway appears which has
- * $routing_class more tq points)
- */
- if (tq_avg > max_tq) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- curr_gw = gw_node;
- kref_get(&curr_gw->refcount);
- }
- break;
- }
-
- if (tq_avg > max_tq)
- max_tq = tq_avg;
-
- if (tmp_gw_factor > max_gw_factor)
- max_gw_factor = tmp_gw_factor;
-
- batadv_gw_node_put(gw_node);
-
-next:
- batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- }
- rcu_read_unlock();
-
- return curr_gw;
-}
-
/**
* batadv_gw_check_client_stop - check if client mode has been switched off
* @bat_priv: the bat priv with all the soft interface information
@@ -287,12 +213,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
goto out;
+ if (!bat_priv->algo_ops->gw.get_best_gw_node)
+ goto out;
+
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
goto out;
- next_gw = batadv_gw_get_best_gw_node(bat_priv);
+ /* if gw.reselect is set to 1 it means that a previous call to
+ * gw.is_eligible() said that we have a new best GW, therefore it can
+ * now be picked from the list and selected
+ */
+ next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv);
if (curr_gw == next_gw)
goto out;
@@ -360,70 +293,31 @@ out:
void batadv_gw_check_election(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
- struct batadv_neigh_ifinfo *router_orig_tq = NULL;
- struct batadv_neigh_ifinfo *router_gw_tq = NULL;
struct batadv_orig_node *curr_gw_orig;
- struct batadv_neigh_node *router_gw = NULL;
- struct batadv_neigh_node *router_orig = NULL;
- u8 gw_tq_avg, orig_tq_avg;
+
+ /* abort immediately if the routing algorithm does not support gateway
+ * election
+ */
+ if (!bat_priv->algo_ops->gw.is_eligible)
+ return;
curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
if (!curr_gw_orig)
goto reselect;
- router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
- if (!router_gw)
- goto reselect;
-
- router_gw_tq = batadv_neigh_ifinfo_get(router_gw,
- BATADV_IF_DEFAULT);
- if (!router_gw_tq)
- goto reselect;
-
/* this node already is the gateway */
if (curr_gw_orig == orig_node)
goto out;
- router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
- if (!router_orig)
+ if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig,
+ orig_node))
goto out;
- router_orig_tq = batadv_neigh_ifinfo_get(router_orig,
- BATADV_IF_DEFAULT);
- if (!router_orig_tq)
- goto out;
-
- gw_tq_avg = router_gw_tq->bat_iv.tq_avg;
- orig_tq_avg = router_orig_tq->bat_iv.tq_avg;
-
- /* the TQ value has to be better */
- if (orig_tq_avg < gw_tq_avg)
- goto out;
-
- /* if the routing class is greater than 3 the value tells us how much
- * greater the TQ value of the new gateway must be
- */
- if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
- (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
- goto out;
-
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
- gw_tq_avg, orig_tq_avg);
-
reselect:
batadv_gw_reselect(bat_priv);
out:
if (curr_gw_orig)
batadv_orig_node_put(curr_gw_orig);
- if (router_gw)
- batadv_neigh_node_put(router_gw);
- if (router_orig)
- batadv_neigh_node_put(router_orig);
- if (router_gw_tq)
- batadv_neigh_ifinfo_put(router_gw_tq);
- if (router_orig_tq)
- batadv_neigh_ifinfo_put(router_orig_tq);
}
/**
@@ -445,14 +339,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
if (!gw_node)
return;
- kref_get(&orig_node->refcount);
+ kref_init(&gw_node->refcount);
INIT_HLIST_NODE(&gw_node->list);
+ kref_get(&orig_node->refcount);
gw_node->orig_node = orig_node;
gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- kref_init(&gw_node->refcount);
spin_lock_bh(&bat_priv->gw.list_lock);
+ kref_get(&gw_node->refcount);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
spin_unlock_bh(&bat_priv->gw.list_lock);
@@ -463,6 +358,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
ntohl(gateway->bandwidth_down) % 10,
ntohl(gateway->bandwidth_up) / 10,
ntohl(gateway->bandwidth_up) % 10);
+
+ /* don't return reference to new gw_node */
+ batadv_gw_node_put(gw_node);
}
/**
@@ -472,9 +370,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
*
* Return: gateway node if found or NULL otherwise.
*/
-static struct batadv_gw_node *
-batadv_gw_node_get(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node)
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node)
{
struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
@@ -585,81 +482,87 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
spin_unlock_bh(&bat_priv->gw.list_lock);
}
-/* fails if orig_node has no router */
-static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
- struct seq_file *seq,
- const struct batadv_gw_node *gw_node)
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
{
- struct batadv_gw_node *curr_gw;
- struct batadv_neigh_node *router;
- struct batadv_neigh_ifinfo *router_ifinfo = NULL;
- int ret = -1;
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hard_iface *primary_if;
- router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
- if (!router)
- goto out;
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
+ return 0;
- router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
- if (!router_ifinfo)
- goto out;
+ seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
+ BATADV_SOURCE_VERSION, primary_if->net_dev->name,
+ primary_if->net_dev->dev_addr, net_dev->name,
+ bat_priv->algo_ops->name);
- curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ batadv_hardif_put(primary_if);
- seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
- (curr_gw == gw_node ? "=>" : " "),
- gw_node->orig_node->orig,
- router_ifinfo->bat_iv.tq_avg, router->addr,
- router->if_incoming->net_dev->name,
- gw_node->bandwidth_down / 10,
- gw_node->bandwidth_down % 10,
- gw_node->bandwidth_up / 10,
- gw_node->bandwidth_up % 10);
- ret = seq_has_overflowed(seq) ? -1 : 0;
+ if (!bat_priv->algo_ops->gw.print) {
+ seq_puts(seq,
+ "No printing function for this routing protocol\n");
+ return 0;
+ }
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
-out:
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
- return ret;
+ bat_priv->algo_ops->gw.print(bat_priv, seq);
+
+ return 0;
}
+#endif
-int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
{
- struct net_device *net_dev = (struct net_device *)seq->private;
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hard_iface *primary_if;
- struct batadv_gw_node *gw_node;
- int gw_count = 0;
-
- primary_if = batadv_seq_print_text_primary_if_get(seq);
- if (!primary_if)
+ struct batadv_hard_iface *primary_if = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ int ret;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
goto out;
+ }
- seq_printf(seq,
- " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
- BATADV_SOURCE_VERSION, primary_if->net_dev->name,
- primary_if->net_dev->dev_addr, net_dev->name);
+ bat_priv = netdev_priv(soft_iface);
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
- /* fails if orig_node has no router */
- if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
- continue;
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
- gw_count++;
+ if (!bat_priv->algo_ops->gw.dump) {
+ ret = -EOPNOTSUPP;
+ goto out;
}
- rcu_read_unlock();
- if (gw_count == 0)
- seq_puts(seq, "No gateways in range ...\n");
+ bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+ ret = msg->len;
out:
if (primary_if)
batadv_hardif_put(primary_if);
- return 0;
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
}
/**
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 582dd8c413c8..859166d03561 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
struct batadv_tvlv_gateway_data;
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -39,10 +40,16 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_free(struct batadv_priv *bat_priv);
+void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+struct batadv_gw_node *
+batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
enum batadv_dhcp_recipient
batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
u8 *chaddr);
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node);
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index d7bc6a87bcc9..21184810d89f 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig, &gateway);
- /* restart gateway selection if fast or late switching was enabled */
+ /* restart gateway selection */
if ((gateway.bandwidth_down != 0) &&
- (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
- (atomic_read(&bat_priv->gw.sel_class) > 2))
+ (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
batadv_gw_check_election(bat_priv, orig);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 1f9080840566..08ce36147c4c 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -35,7 +35,8 @@
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
#include "bat_v.h"
#include "bridge_loop_avoidance.h"
@@ -85,25 +86,55 @@ out:
}
/**
+ * batadv_getlink_net - return link net namespace (of use fallback)
+ * @netdev: net_device to check
+ * @fallback_net: return in case get_link_net is not available for @netdev
+ *
+ * Return: result of rtnl_link_ops->get_link_net or @fallback_net
+ */
+static const struct net *batadv_getlink_net(const struct net_device *netdev,
+ const struct net *fallback_net)
+{
+ if (!netdev->rtnl_link_ops)
+ return fallback_net;
+
+ if (!netdev->rtnl_link_ops->get_link_net)
+ return fallback_net;
+
+ return netdev->rtnl_link_ops->get_link_net(netdev);
+}
+
+/**
* batadv_mutual_parents - check if two devices are each others parent
- * @dev1: 1st net_device
- * @dev2: 2nd net_device
+ * @dev1: 1st net dev
+ * @net1: 1st devices netns
+ * @dev2: 2nd net dev
+ * @net2: 2nd devices netns
*
* veth devices come in pairs and each is the parent of the other!
*
* Return: true if the devices are each others parent, otherwise false
*/
static bool batadv_mutual_parents(const struct net_device *dev1,
- const struct net_device *dev2)
+ const struct net *net1,
+ const struct net_device *dev2,
+ const struct net *net2)
{
int dev1_parent_iflink = dev_get_iflink(dev1);
int dev2_parent_iflink = dev_get_iflink(dev2);
+ const struct net *dev1_parent_net;
+ const struct net *dev2_parent_net;
+
+ dev1_parent_net = batadv_getlink_net(dev1, net1);
+ dev2_parent_net = batadv_getlink_net(dev2, net2);
if (!dev1_parent_iflink || !dev2_parent_iflink)
return false;
return (dev1_parent_iflink == dev2->ifindex) &&
- (dev2_parent_iflink == dev1->ifindex);
+ (dev2_parent_iflink == dev1->ifindex) &&
+ net_eq(dev1_parent_net, net2) &&
+ net_eq(dev2_parent_net, net1);
}
/**
@@ -121,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
*/
static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
{
- struct net_device *parent_dev;
struct net *net = dev_net(net_dev);
+ struct net_device *parent_dev;
+ const struct net *parent_net;
bool ret;
/* check if this is a batman-adv mesh interface */
@@ -134,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
dev_get_iflink(net_dev) == net_dev->ifindex)
return false;
+ parent_net = batadv_getlink_net(net_dev, net);
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
+ parent_dev = __dev_get_by_index((struct net *)parent_net,
+ dev_get_iflink(net_dev));
/* if we got a NULL parent_dev there is something broken.. */
if (WARN(!parent_dev, "Cannot find parent device"))
return false;
- if (batadv_mutual_parents(net_dev, parent_dev))
+ if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
return false;
ret = batadv_is_on_batman_iface(parent_dev);
@@ -625,25 +660,6 @@ out:
batadv_hardif_put(primary_if);
}
-/**
- * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
- * @work: work queue item
- *
- * Free the parts of the hard interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_hardif_remove_interface_finish(struct work_struct *work)
-{
- struct batadv_hard_iface *hard_iface;
-
- hard_iface = container_of(work, struct batadv_hard_iface,
- cleanup_work);
-
- batadv_debugfs_del_hardif(hard_iface);
- batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
- batadv_hardif_put(hard_iface);
-}
-
static struct batadv_hard_iface *
batadv_hardif_add_interface(struct net_device *net_dev)
{
@@ -676,10 +692,9 @@ batadv_hardif_add_interface(struct net_device *net_dev)
INIT_LIST_HEAD(&hard_iface->list);
INIT_HLIST_HEAD(&hard_iface->neigh_list);
- INIT_WORK(&hard_iface->cleanup_work,
- batadv_hardif_remove_interface_finish);
spin_lock_init(&hard_iface->neigh_list_lock);
+ kref_init(&hard_iface->refcount);
hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
if (batadv_is_wifi_netdev(net_dev))
@@ -687,11 +702,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
batadv_v_hardif_init(hard_iface);
- /* extra reference for return */
- kref_init(&hard_iface->refcount);
- kref_get(&hard_iface->refcount);
-
batadv_check_known_mac_addr(hard_iface->net_dev);
+ kref_get(&hard_iface->refcount);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
return hard_iface;
@@ -713,13 +725,15 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
/* first deactivate interface */
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
batadv_hardif_disable_interface(hard_iface,
- BATADV_IF_CLEANUP_AUTO);
+ BATADV_IF_CLEANUP_KEEP);
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
return;
hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
- queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
+ batadv_debugfs_del_hardif(hard_iface);
+ batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+ batadv_hardif_put(hard_iface);
}
void batadv_hardif_remove_interfaces(void)
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 618d5de06f20..e44a7da51431 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -26,9 +26,25 @@ struct batadv_icmp_header;
#define BATADV_ICMP_SOCKET "socket"
-void batadv_socket_init(void);
int batadv_socket_setup(struct batadv_priv *bat_priv);
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+void batadv_socket_init(void);
void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
size_t icmp_len);
+#else
+
+static inline void batadv_socket_init(void)
+{
+}
+
+static inline void
+batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len)
+{
+}
+
+#endif
+
#endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fe4c5e29f96b..2c017ab47557 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void);
static int __init batadv_init(void)
{
+ int ret;
+
+ ret = batadv_tt_cache_init();
+ if (ret < 0)
+ return ret;
+
INIT_LIST_HEAD(&batadv_hardif_list);
batadv_algo_init();
@@ -93,9 +99,8 @@ static int __init batadv_init(void)
batadv_tp_meter_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
-
if (!batadv_event_workqueue)
- return -ENOMEM;
+ goto err_create_wq;
batadv_socket_init();
batadv_debugfs_init();
@@ -108,6 +113,11 @@ static int __init batadv_init(void)
BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
return 0;
+
+err_create_wq:
+ batadv_tt_cache_destroy();
+
+ return -ENOMEM;
}
static void __exit batadv_exit(void)
@@ -123,6 +133,8 @@ static void __exit batadv_exit(void)
batadv_event_workqueue = NULL;
rcu_barrier();
+
+ batadv_tt_cache_destroy();
}
int batadv_mesh_init(struct net_device *soft_iface)
@@ -270,6 +282,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
return is_my_mac;
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_seq_print_text_primary_if_get - called from debugfs table printing
* function that requires the primary interface
@@ -305,6 +318,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
out:
return primary_if;
}
+#endif
/**
* batadv_max_header_len - calculate maximum encapsulation overhead for a
@@ -638,3 +652,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR);
MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
MODULE_VERSION(BATADV_SOURCE_VERSION);
+MODULE_ALIAS_RTNL_LINK("batadv");
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 06a860845434..09af21e27639 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.3"
+#define BATADV_SOURCE_VERSION "2016.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cc915073a753..13661f43386f 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -528,7 +528,7 @@ update:
}
return !(mcast_data.flags &
- (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
+ (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
}
/**
@@ -1134,6 +1134,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_mcast_flags_print_header - print own mcast flags to debugfs table
* @bat_priv: the bat priv with all the soft interface information
@@ -1234,6 +1235,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
return 0;
}
+#endif
/**
* batadv_mcast_free - free the multicast optimizations structures
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 231f8eaf075b..18831e72b0fb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -18,6 +18,8 @@
#include "netlink.h"
#include "main.h"
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/genetlink.h>
@@ -26,24 +28,33 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <net/genetlink.h>
#include <net/netlink.h>
+#include <net/sock.h>
#include <uapi/linux/batman_adv.h>
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
#include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
#include "soft-interface.h"
#include "tp_meter.h"
+#include "translation-table.h"
-struct sk_buff;
-
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = BATADV_NL_NAME,
.version = 1,
.maxattr = BATADV_ATTR_MAX,
+ .netnsok = true,
};
/* multicast groups */
@@ -69,9 +80,44 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
[BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
[BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
[BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+ [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
+ [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
+ [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
+ [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
+ [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TQ] = { .type = NLA_U8 },
+ [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
+ [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
+ [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
};
/**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+ struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+ return attr ? nla_get_u32(attr) : 0;
+}
+
+/**
* batadv_netlink_mesh_info_put - fill in generic information about mesh
* interface
* @msg: netlink message to be sent back
@@ -93,8 +139,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
- soft_iface->dev_addr))
+ soft_iface->dev_addr) ||
+ nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
+ (u8)atomic_read(&bat_priv->tt.vn)))
+ goto out;
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+ if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+ ntohs(bat_priv->bla.claim_dest.group)))
goto out;
+#endif
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -380,6 +434,106 @@ out:
return ret;
}
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_hard_iface *hard_iface)
+{
+ struct net_device *net_dev = hard_iface->net_dev;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_HARDIFS);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ net_dev->ifindex) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ net_dev->name) ||
+ nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+ net_dev->dev_addr))
+ goto nla_put_failure;
+
+ if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+ if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hard_iface *hard_iface;
+ int ifindex;
+ int portid = NETLINK_CB(cb->skb).portid;
+ int seq = cb->nlh->nlmsg_seq;
+ int skip = cb->args[0];
+ int i = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface)
+ return -ENODEV;
+
+ if (!batadv_softif_is_valid(soft_iface)) {
+ dev_put(soft_iface);
+ return -ENODEV;
+ }
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface != soft_iface)
+ continue;
+
+ if (i++ < skip)
+ continue;
+
+ if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+ hard_iface)) {
+ i--;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ dev_put(soft_iface);
+
+ cb->args[0] = i;
+
+ return msg->len;
+}
+
static struct genl_ops batadv_netlink_ops[] = {
{
.cmd = BATADV_CMD_GET_MESH_INFO,
@@ -399,6 +553,61 @@ static struct genl_ops batadv_netlink_ops[] = {
.policy = batadv_netlink_policy,
.doit = batadv_netlink_tp_meter_cancel,
},
+ {
+ .cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_algo_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_HARDIFS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_netlink_dump_hardifs,
+ },
+ {
+ .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_tt_local_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_tt_global_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_ORIGINATORS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_orig_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_NEIGHBORS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_hardif_neigh_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_GATEWAYS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_gw_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_BLA_CLAIM,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_bla_claim_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_BLA_BACKBONE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_bla_backbone_dump,
+ },
+
};
/**
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 945653ab58c6..52eb16281aba 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,18 @@
#include "main.h"
#include <linux/types.h>
+#include <net/genetlink.h>
+
+struct nlmsghdr;
void batadv_netlink_register(void);
void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie);
+extern struct genl_family batadv_netlink_family;
+
#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 293ef4ffd4e1..e3baf697a35c 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -856,14 +856,12 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
if (!nc_node)
return NULL;
- kref_get(&orig_neigh_node->refcount);
-
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
+ kref_init(&nc_node->refcount);
ether_addr_copy(nc_node->addr, orig_node->orig);
+ kref_get(&orig_neigh_node->refcount);
nc_node->orig_node = orig_neigh_node;
- kref_init(&nc_node->refcount);
- kref_get(&nc_node->refcount);
/* Select ingoing or outgoing coding node */
if (in_coding) {
@@ -879,6 +877,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
/* Add nc_node to orig_node */
spin_lock_bh(lock);
+ kref_get(&nc_node->refcount);
list_add_tail_rcu(&nc_node->list, list);
spin_unlock_bh(lock);
@@ -979,7 +978,6 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
INIT_LIST_HEAD(&nc_path->packet_list);
spin_lock_init(&nc_path->packet_list_lock);
kref_init(&nc_path->refcount);
- kref_get(&nc_path->refcount);
nc_path->last_valid = jiffies;
ether_addr_copy(nc_path->next_hop, dst);
ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +987,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
nc_path->next_hop);
/* Add nc_path to hash table */
+ kref_get(&nc_path->refcount);
hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
batadv_nc_hash_choose, &nc_path_key,
&nc_path->hash_entry);
@@ -1882,6 +1881,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(bat_priv->nc.decoding_hash);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_nc_nodes_seq_print_text - print the nc node information
* @seq: seq file to print on
@@ -1981,3 +1981,4 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
out:
return -ENOMEM;
}
+#endif
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3940b5d24421..5f3bfc41aeb1 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,11 +28,15 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
#include "distributed-arp-table.h"
@@ -42,8 +46,10 @@
#include "hash.h"
#include "log.h"
#include "multicast.h"
+#include "netlink.h"
#include "network-coding.h"
#include "routing.h"
+#include "soft-interface.h"
#include "translation-table.h"
/* hash class keys */
@@ -127,9 +133,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
goto out;
kref_init(&vlan->refcount);
- kref_get(&vlan->refcount);
vlan->vid = vid;
+ kref_get(&vlan->refcount);
hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
out:
@@ -380,6 +386,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
orig_ifinfo->if_outgoing = if_outgoing;
INIT_HLIST_NODE(&orig_ifinfo->list);
kref_init(&orig_ifinfo->refcount);
+
kref_get(&orig_ifinfo->refcount);
hlist_add_head_rcu(&orig_ifinfo->list,
&orig_node->ifinfo_list);
@@ -453,9 +460,9 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
INIT_HLIST_NODE(&neigh_ifinfo->list);
kref_init(&neigh_ifinfo->refcount);
- kref_get(&neigh_ifinfo->refcount);
neigh_ifinfo->if_outgoing = if_outgoing;
+ kref_get(&neigh_ifinfo->refcount);
hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
out:
@@ -647,8 +654,8 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
/* extra reference for return */
kref_init(&neigh_node->refcount);
- kref_get(&neigh_node->refcount);
+ kref_get(&neigh_node->refcount);
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
@@ -686,6 +693,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
* @seq: neighbour table seq_file struct
@@ -719,6 +727,84 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
bat_priv->algo_ops->neigh.print(bat_priv, seq);
return 0;
}
+#endif
+
+/**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ * outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct net_device *hard_iface = NULL;
+ struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+ struct batadv_priv *bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ int ret;
+ int ifindex, hard_ifindex;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_HARD_IFINDEX);
+ if (hard_ifindex) {
+ hard_iface = dev_get_by_index(net, hard_ifindex);
+ if (hard_iface)
+ hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+ if (!hardif) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (hardif->soft_iface != soft_iface) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (!bat_priv->algo_ops->neigh.dump) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+ ret = msg->len;
+
+ out:
+ if (hardif)
+ batadv_hardif_put(hardif);
+ if (hard_iface)
+ dev_put(hard_iface);
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
/**
* batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
@@ -905,7 +991,6 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
/* extra reference for return */
kref_init(&orig_node->refcount);
- kref_get(&orig_node->refcount);
orig_node->bat_priv = bat_priv;
ether_addr_copy(orig_node->orig, addr);
@@ -1256,6 +1341,7 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
_batadv_purge_orig(bat_priv);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1329,6 +1415,84 @@ out:
batadv_hardif_put(hard_iface);
return 0;
}
+#endif
+
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ * outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct net_device *hard_iface = NULL;
+ struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+ struct batadv_priv *bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ int ret;
+ int ifindex, hard_ifindex;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_HARD_IFINDEX);
+ if (hard_ifindex) {
+ hard_iface = dev_get_by_index(net, hard_ifindex);
+ if (hard_iface)
+ hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+ if (!hardif) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (hardif->soft_iface != soft_iface) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (!bat_priv->algo_ops->orig.dump) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+ ret = msg->len;
+
+ out:
+ if (hardif)
+ batadv_hardif_put(hardif);
+ if (hard_iface)
+ dev_put(hard_iface);
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 566306bf05dc..ebc56183f358 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -31,7 +31,9 @@
#include "hash.h"
+struct netlink_callback;
struct seq_file;
+struct sk_buff;
bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6b011ff64dd8..6afc0b86950e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -129,42 +129,6 @@ enum batadv_tt_data_flags {
};
/**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- * update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- * This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- * information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- * not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- * in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- * the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
- BATADV_TT_CLIENT_DEL = BIT(0),
- BATADV_TT_CLIENT_ROAM = BIT(1),
- BATADV_TT_CLIENT_WIFI = BIT(4),
- BATADV_TT_CLIENT_ISOLA = BIT(5),
- BATADV_TT_CLIENT_NOPURGE = BIT(8),
- BATADV_TT_CLIENT_NEW = BIT(9),
- BATADV_TT_CLIENT_PENDING = BIT(10),
- BATADV_TT_CLIENT_TEMP = BIT(11),
-};
-
-/**
* enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
* @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
*/
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7602c001e92b..610f2c45edcd 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
if (!orig_ifinfo)
return;
- rcu_read_lock();
- curr_router = rcu_dereference(orig_ifinfo->router);
- if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
- curr_router = NULL;
- rcu_read_unlock();
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+ /* increase refcount of new best neighbor */
+ if (neigh_node)
+ kref_get(&neigh_node->refcount);
+
+ rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+ batadv_orig_ifinfo_put(orig_ifinfo);
/* route deleted */
if ((curr_router) && (!neigh_node)) {
@@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
curr_router->addr);
}
- if (curr_router)
- batadv_neigh_node_put(curr_router);
-
- spin_lock_bh(&orig_node->neigh_list_lock);
- /* curr_router used earlier may not be the current orig_ifinfo->router
- * anymore because it was dereferenced outside of the neigh_list_lock
- * protected region. After the new best neighbor has replace the current
- * best neighbor the reference counter needs to decrease. Consequently,
- * the code needs to ensure the curr_router variable contains a pointer
- * to the replaced best neighbor.
- */
- curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
- /* increase refcount of new best neighbor */
- if (neigh_node)
- kref_get(&neigh_node->refcount);
-
- rcu_assign_pointer(orig_ifinfo->router, neigh_node);
- spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_orig_ifinfo_put(orig_ifinfo);
-
/* decrease refcount of previous best neighbor */
if (curr_router)
batadv_neigh_node_put(curr_router);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 6191159484df..8d4e1f578574 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -315,8 +315,7 @@ out:
*
* Wrap the given skb into a batman-adv unicast or unicast-4addr header
* depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
- * as packet_type. Then send this frame to the given orig_node and release a
- * reference to this orig_node.
+ * as packet_type. Then send this frame to the given orig_node.
*
* Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
@@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
ret = NET_XMIT_SUCCESS;
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
if (ret == NET_XMIT_DROP)
kfree_skb(skb);
return ret;
@@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
struct batadv_orig_node *orig_node;
u8 *src, *dst;
+ int ret;
src = ethhdr->h_source;
dst = ethhdr->h_dest;
@@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
}
orig_node = batadv_transtable_search(bat_priv, src, dst, vid);
- return batadv_send_skb_unicast(bat_priv, skb, packet_type,
- packet_subtype, orig_node, vid);
+ ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
+ packet_subtype, orig_node, vid);
+
+ if (orig_node)
+ batadv_orig_node_put(orig_node);
+
+ return ret;
}
/**
@@ -433,12 +436,25 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid)
{
struct batadv_orig_node *orig_node;
+ int ret;
orig_node = batadv_gw_get_selected_orig(bat_priv);
- return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
- BATADV_P_DATA, orig_node, vid);
+ ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+ BATADV_P_DATA, orig_node, vid);
+
+ if (orig_node)
+ batadv_orig_node_put(orig_node);
+
+ return ret;
}
+/**
+ * batadv_forw_packet_free - free a forwarding packet
+ * @forw_packet: The packet to free
+ *
+ * This frees a forwarding packet and releases any resources it might
+ * have claimed.
+ */
void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
{
kfree_skb(forw_packet->skb);
@@ -446,9 +462,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
batadv_hardif_put(forw_packet->if_incoming);
if (forw_packet->if_outgoing)
batadv_hardif_put(forw_packet->if_outgoing);
+ if (forw_packet->queue_left)
+ atomic_inc(forw_packet->queue_left);
kfree(forw_packet);
}
+/**
+ * batadv_forw_packet_alloc - allocate a forwarding packet
+ * @if_incoming: The (optional) if_incoming to be grabbed
+ * @if_outgoing: The (optional) if_outgoing to be grabbed
+ * @queue_left: The (optional) queue counter to decrease
+ * @bat_priv: The bat_priv for the mesh of this forw_packet
+ *
+ * Allocates a forwarding packet and tries to get a reference to the
+ * (optional) if_incoming, if_outgoing and queue_left. If queue_left
+ * is NULL then bat_priv is optional, too.
+ *
+ * Return: An allocated forwarding packet on success, NULL otherwise.
+ */
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+ struct batadv_hard_iface *if_outgoing,
+ atomic_t *queue_left,
+ struct batadv_priv *bat_priv)
+{
+ struct batadv_forw_packet *forw_packet;
+ const char *qname;
+
+ if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) {
+ qname = "unknown";
+
+ if (queue_left == &bat_priv->bcast_queue_left)
+ qname = "bcast";
+
+ if (queue_left == &bat_priv->batman_queue_left)
+ qname = "batman";
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "%s queue is full\n", qname);
+
+ return NULL;
+ }
+
+ forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+ if (!forw_packet)
+ goto err;
+
+ if (if_incoming)
+ kref_get(&if_incoming->refcount);
+
+ if (if_outgoing)
+ kref_get(&if_outgoing->refcount);
+
+ forw_packet->skb = NULL;
+ forw_packet->queue_left = queue_left;
+ forw_packet->if_incoming = if_incoming;
+ forw_packet->if_outgoing = if_outgoing;
+ forw_packet->num_packets = 0;
+
+ return forw_packet;
+
+err:
+ if (queue_left)
+ atomic_inc(queue_left);
+
+ return NULL;
+}
+
static void
_batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet,
@@ -487,24 +567,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
struct batadv_bcast_packet *bcast_packet;
struct sk_buff *newskb;
- if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "bcast packet queue full\n");
- goto out;
- }
-
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
- goto out_and_inc;
-
- forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+ goto err;
+ forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+ &bat_priv->bcast_queue_left,
+ bat_priv);
+ batadv_hardif_put(primary_if);
if (!forw_packet)
- goto out_and_inc;
+ goto err;
newskb = skb_copy(skb, GFP_ATOMIC);
if (!newskb)
- goto packet_free;
+ goto err_packet_free;
/* as we have a copy now, it is safe to decrease the TTL */
bcast_packet = (struct batadv_bcast_packet *)newskb->data;
@@ -513,11 +589,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
skb_reset_mac_header(newskb);
forw_packet->skb = newskb;
- forw_packet->if_incoming = primary_if;
- forw_packet->if_outgoing = NULL;
-
- /* how often did we send the bcast packet ? */
- forw_packet->num_packets = 0;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
batadv_send_outstanding_bcast_packet);
@@ -525,13 +596,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
return NETDEV_TX_OK;
-packet_free:
- kfree(forw_packet);
-out_and_inc:
- atomic_inc(&bat_priv->bcast_queue_left);
-out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+err_packet_free:
+ batadv_forw_packet_free(forw_packet);
+err:
return NETDEV_TX_BUSY;
}
@@ -592,7 +659,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
out:
batadv_forw_packet_free(forw_packet);
- atomic_inc(&bat_priv->bcast_queue_left);
}
void
@@ -633,9 +699,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
- if (!forw_packet->own)
- atomic_inc(&bat_priv->bcast_queue_left);
-
batadv_forw_packet_free(forw_packet);
}
}
@@ -663,9 +726,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
- if (!forw_packet->own)
- atomic_inc(&bat_priv->batman_queue_left);
-
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 7cecb7563b45..999f78683d9e 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -28,6 +28,12 @@
struct sk_buff;
void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+ struct batadv_hard_iface *if_outgoing,
+ atomic_t *queue_left,
+ struct batadv_priv *bat_priv);
+
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7527c0652dd5..49e16b6e0ba3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -39,6 +39,7 @@
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/socket.h>
@@ -46,7 +47,6 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
-#include <linux/workqueue.h>
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
@@ -57,6 +57,7 @@
#include "hard-interface.h"
#include "multicast.h"
#include "network-coding.h"
+#include "originator.h"
#include "packet.h"
#include "send.h"
#include "sysfs.h"
@@ -377,6 +378,8 @@ dropped:
dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
+ if (mcast_single_orig)
+ batadv_orig_node_put(mcast_single_orig);
if (primary_if)
batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
@@ -591,6 +594,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
}
spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ kref_get(&vlan->refcount);
hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
@@ -601,6 +605,9 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
bat_priv->soft_iface->dev_addr, vid,
BATADV_NULL_IFINDEX, BATADV_NO_MARK);
+ /* don't return reference to new softif_vlan */
+ batadv_softif_vlan_put(vlan);
+
return 0;
}
@@ -747,34 +754,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
}
/**
- * batadv_softif_destroy_finish - cleans up the remains of a softif
- * @work: work queue item
- *
- * Free the parts of the soft interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_softif_destroy_finish(struct work_struct *work)
-{
- struct batadv_softif_vlan *vlan;
- struct batadv_priv *bat_priv;
- struct net_device *soft_iface;
-
- bat_priv = container_of(work, struct batadv_priv,
- cleanup_work);
- soft_iface = bat_priv->soft_iface;
-
- /* destroy the "untagged" VLAN */
- vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
- if (vlan) {
- batadv_softif_destroy_vlan(bat_priv, vlan);
- batadv_softif_vlan_put(vlan);
- }
-
- batadv_sysfs_del_meshif(soft_iface);
- unregister_netdev(soft_iface);
-}
-
-/**
* batadv_softif_init_late - late stage initialization of soft interface
* @dev: registered network device to modify
*
@@ -791,7 +770,6 @@ static int batadv_softif_init_late(struct net_device *dev)
bat_priv = netdev_priv(dev);
bat_priv->soft_iface = dev;
- INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
/* batadv_interface_stats() needs to be available as soon as
* register_netdevice() has been called
@@ -1028,8 +1006,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ struct batadv_softif_vlan *vlan;
- queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+ ASSERT_RTNL();
+
+ /* destroy the "untagged" VLAN */
+ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+ if (vlan) {
+ batadv_softif_destroy_vlan(bat_priv, vlan);
+ batadv_softif_vlan_put(vlan);
+ }
+
+ batadv_sysfs_del_meshif(soft_iface);
+ unregister_netdevice(soft_iface);
}
/**
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe9ca94ddee2..02d96f224c60 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -37,6 +37,7 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/stringify.h>
+#include <linux/workqueue.h>
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
@@ -428,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
int bytes_written;
+ /* GW mode is not available if the routing algorithm in use does not
+ * implement the GW API
+ */
+ if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+ !bat_priv->algo_ops->gw.is_eligible)
+ return -ENOENT;
+
switch (atomic_read(&bat_priv->gw.mode)) {
case BATADV_GW_MODE_CLIENT:
bytes_written = sprintf(buff, "%s\n",
@@ -455,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
char *curr_gw_mode_str;
int gw_mode_tmp = -1;
+ /* toggling GW mode is allowed only if the routing algorithm in use
+ * provides the GW API
+ */
+ if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+ !bat_priv->algo_ops->gw.is_eligible)
+ return -EINVAL;
+
if (buff[count - 1] == '\n')
buff[count - 1] = '\0';
@@ -514,6 +529,50 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
return count;
}
+static ssize_t batadv_show_gw_sel_class(struct kobject *kobj,
+ struct attribute *attr, char *buff)
+{
+ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+ /* GW selection class is not available if the routing algorithm in use
+ * does not implement the GW API
+ */
+ if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+ !bat_priv->algo_ops->gw.is_eligible)
+ return -ENOENT;
+
+ if (bat_priv->algo_ops->gw.show_sel_class)
+ return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff);
+
+ return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class));
+}
+
+static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
+ struct attribute *attr, char *buff,
+ size_t count)
+{
+ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+ /* setting the GW selection class is allowed only if the routing
+ * algorithm in use implements the GW API
+ */
+ if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+ !bat_priv->algo_ops->gw.is_eligible)
+ return -EINVAL;
+
+ if (buff[count - 1] == '\n')
+ buff[count - 1] = '\0';
+
+ if (bat_priv->algo_ops->gw.store_sel_class)
+ return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
+ count);
+
+ return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+ batadv_post_gw_reselect, attr,
+ &bat_priv->gw.sel_class,
+ bat_priv->soft_iface);
+}
+
static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
struct attribute *attr, char *buff)
{
@@ -625,8 +684,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
2 * BATADV_JITTER, INT_MAX, NULL);
BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
- BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
+static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+ batadv_store_gw_sel_class);
static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
batadv_store_gw_bwidth);
#ifdef CONFIG_BATMAN_ADV_MCAST
@@ -712,6 +771,8 @@ rem_attr:
for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+ kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+ kobject_del(bat_priv->mesh_obj);
kobject_put(bat_priv->mesh_obj);
bat_priv->mesh_obj = NULL;
out:
@@ -726,6 +787,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+ kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+ kobject_del(bat_priv->mesh_obj);
kobject_put(bat_priv->mesh_obj);
bat_priv->mesh_obj = NULL;
}
@@ -781,6 +844,10 @@ rem_attr:
for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+ if (vlan->kobj != bat_priv->mesh_obj) {
+ kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+ kobject_del(vlan->kobj);
+ }
kobject_put(vlan->kobj);
vlan->kobj = NULL;
out:
@@ -800,6 +867,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+ if (vlan->kobj != bat_priv->mesh_obj) {
+ kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+ kobject_del(vlan->kobj);
+ }
kobject_put(vlan->kobj);
vlan->kobj = NULL;
}
@@ -828,31 +899,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
return length;
}
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
- struct attribute *attr, char *buff,
- size_t count)
+/**
+ * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @ifname: name of soft-interface to modify
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ *
+ * Return: 0 on success, 0 < on failure
+ */
+static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
+ char ifname[IFNAMSIZ])
{
- struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
struct net *net = dev_net(net_dev);
struct batadv_hard_iface *hard_iface;
- int status_tmp = -1;
- int ret = count;
+ int status_tmp;
+ int ret = 0;
+
+ ASSERT_RTNL();
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface)
- return count;
-
- if (buff[count - 1] == '\n')
- buff[count - 1] = '\0';
-
- if (strlen(buff) >= IFNAMSIZ) {
- pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
- buff);
- batadv_hardif_put(hard_iface);
- return -EINVAL;
- }
+ return 0;
- if (strncmp(buff, "none", 4) == 0)
+ if (strncmp(ifname, "none", 4) == 0)
status_tmp = BATADV_IF_NOT_IN_USE;
else
status_tmp = BATADV_IF_I_WANT_YOU;
@@ -861,15 +932,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
goto out;
if ((hard_iface->soft_iface) &&
- (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+ (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
goto out;
- rtnl_lock();
-
if (status_tmp == BATADV_IF_NOT_IN_USE) {
batadv_hardif_disable_interface(hard_iface,
BATADV_IF_CLEANUP_AUTO);
- goto unlock;
+ goto out;
}
/* if the interface already is in use */
@@ -877,15 +946,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
batadv_hardif_disable_interface(hard_iface,
BATADV_IF_CLEANUP_AUTO);
- ret = batadv_hardif_enable_interface(hard_iface, net, buff);
-
-unlock:
- rtnl_unlock();
+ ret = batadv_hardif_enable_interface(hard_iface, net, ifname);
out:
batadv_hardif_put(hard_iface);
return ret;
}
+/**
+ * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * @work: work queue item
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ */
+static void batadv_store_mesh_iface_work(struct work_struct *work)
+{
+ struct batadv_store_mesh_work *store_work;
+ int ret;
+
+ store_work = container_of(work, struct batadv_store_mesh_work, work);
+
+ rtnl_lock();
+ ret = batadv_store_mesh_iface_finish(store_work->net_dev,
+ store_work->soft_iface_name);
+ rtnl_unlock();
+
+ if (ret < 0)
+ pr_err("Failed to store new mesh_iface state %s for %s: %d\n",
+ store_work->soft_iface_name, store_work->net_dev->name,
+ ret);
+
+ dev_put(store_work->net_dev);
+ kfree(store_work);
+}
+
+static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
+ struct attribute *attr, char *buff,
+ size_t count)
+{
+ struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+ struct batadv_store_mesh_work *store_work;
+
+ if (buff[count - 1] == '\n')
+ buff[count - 1] = '\0';
+
+ if (strlen(buff) >= IFNAMSIZ) {
+ pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
+ buff);
+ return -EINVAL;
+ }
+
+ store_work = kmalloc(sizeof(*store_work), GFP_KERNEL);
+ if (!store_work)
+ return -ENOMEM;
+
+ dev_hold(net_dev);
+ INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
+ store_work->net_dev = net_dev;
+ strlcpy(store_work->soft_iface_name, buff,
+ sizeof(store_work->soft_iface_name));
+
+ queue_work(batadv_event_workqueue, &store_work->work);
+
+ return count;
+}
+
static ssize_t batadv_show_iface_status(struct kobject *kobj,
struct attribute *attr, char *buff)
{
@@ -1048,6 +1173,8 @@ out:
void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
{
+ kobject_uevent(*hardif_obj, KOBJ_REMOVE);
+ kobject_del(*hardif_obj);
kobject_put(*hardif_obj);
*hardif_obj = NULL;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7e6df7a4964a..7f663092f6de 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -22,12 +22,14 @@
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
#include <linux/compiler.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/if_ether.h>
+#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -35,25 +37,39 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
#include "multicast.h"
+#include "netlink.h"
#include "originator.h"
#include "packet.h"
#include "soft-interface.h"
#include "tvlv.h"
+static struct kmem_cache *batadv_tl_cache __read_mostly;
+static struct kmem_cache *batadv_tg_cache __read_mostly;
+static struct kmem_cache *batadv_tt_orig_cache __read_mostly;
+static struct kmem_cache *batadv_tt_change_cache __read_mostly;
+static struct kmem_cache *batadv_tt_req_cache __read_mostly;
+static struct kmem_cache *batadv_tt_roam_cache __read_mostly;
+
/* hash class keys */
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
static struct lock_class_key batadv_tt_global_hash_lock_class_key;
@@ -205,6 +221,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
+ * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_local_entry *tt_local_entry;
+
+ tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+ common.rcu);
+
+ kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
+/**
* batadv_tt_local_entry_release - release tt_local_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -218,7 +248,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
batadv_softif_vlan_put(tt_local_entry->vlan);
- kfree_rcu(tt_local_entry, common.rcu);
+ call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
}
/**
@@ -234,6 +264,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
}
/**
+ * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_global_entry *tt_global_entry;
+
+ tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+ common.rcu);
+
+ kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
+/**
* batadv_tt_global_entry_release - release tt_global_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -246,7 +290,8 @@ static void batadv_tt_global_entry_release(struct kref *ref)
common.refcount);
batadv_tt_global_del_orig_list(tt_global_entry);
- kfree_rcu(tt_global_entry, common.rcu);
+
+ call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
}
/**
@@ -384,6 +429,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+ kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
+/**
* batadv_tt_orig_list_entry_release - release tt orig entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the tt orig entry
@@ -396,7 +454,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
refcount);
batadv_orig_node_put(orig_entry->orig_node);
- kfree_rcu(orig_entry, rcu);
+ call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
}
/**
@@ -426,7 +484,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
bool event_removed = false;
bool del_op_requested, del_op_entry;
- tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
+ tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC);
if (!tt_change_node)
return;
@@ -467,8 +525,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
continue;
del:
list_del(&entry->list);
- kfree(entry);
- kfree(tt_change_node);
+ kmem_cache_free(batadv_tt_change_cache, entry);
+ kmem_cache_free(batadv_tt_change_cache, tt_change_node);
event_removed = true;
goto unlock;
}
@@ -646,7 +704,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
goto out;
}
- tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+ tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC);
if (!tt_local)
goto out;
@@ -656,7 +714,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
net_ratelimited_function(batadv_info, soft_iface,
"adding TT local entry %pM to non-existent VLAN %d\n",
addr, BATADV_PRINT_VID(vid));
- kfree(tt_local);
+ kmem_cache_free(batadv_tl_cache, tt_local);
tt_local = NULL;
goto out;
}
@@ -676,7 +734,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (batadv_is_wifi_netdev(in_dev))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
kref_init(&tt_local->common.refcount);
- kref_get(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
tt_local->vlan = vlan;
@@ -688,6 +745,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
is_multicast_ether_addr(addr))
tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
+ kref_get(&tt_local->common.refcount);
hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
batadv_choose_tt, &tt_local->common,
&tt_local->common.hash_entry);
@@ -959,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
tt_diff_entries_count++;
}
list_del(&entry->list);
- kfree(entry);
+ kmem_cache_free(batadv_tt_change_cache, entry);
}
spin_unlock_bh(&bat_priv->tt.changes_list_lock);
@@ -989,6 +1047,7 @@ container_register:
kfree(tt_data);
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1056,6 +1115,165 @@ out:
batadv_hardif_put(primary_if);
return 0;
}
+#endif
+
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_tt_common_entry *common)
+{
+ void *hdr;
+ struct batadv_softif_vlan *vlan;
+ struct batadv_tt_local_entry *local;
+ unsigned int last_seen_msecs;
+ u32 crc;
+
+ local = container_of(common, struct batadv_tt_local_entry, common);
+ last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+ vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+ if (!vlan)
+ return 0;
+
+ crc = vlan->tt.crc;
+
+ batadv_softif_vlan_put(vlan);
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI,
+ BATADV_CMD_GET_TRANSTABLE_LOCAL);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+ nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+ nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+ goto nla_put_failure;
+
+ if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct hlist_head *head, int *idx_s)
+{
+ struct batadv_tt_common_entry *common;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(common, head, hash_entry) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+ common)) {
+ rcu_read_unlock();
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+ rcu_read_unlock();
+
+ *idx_s = 0;
+ return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_hashtable *hash;
+ struct hlist_head *head;
+ int ret;
+ int ifindex;
+ int bucket = cb->args[0];
+ int idx = cb->args[1];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ hash = bat_priv->tt.local_hash;
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, head, &idx))
+ break;
+
+ bucket++;
+ }
+
+ ret = msg->len;
+
+ out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ return ret;
+}
static void
batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
@@ -1259,7 +1477,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
list_del(&entry->list);
- kfree(entry);
+ kmem_cache_free(batadv_tt_change_cache, entry);
}
atomic_set(&bat_priv->tt.local_changes, 0);
@@ -1341,7 +1559,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
goto out;
}
- orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
+ orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
if (!orig_entry)
goto out;
@@ -1351,9 +1569,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
kref_init(&orig_entry->refcount);
- kref_get(&orig_entry->refcount);
spin_lock_bh(&tt_global->list_lock);
+ kref_get(&orig_entry->refcount);
hlist_add_head_rcu(&orig_entry->list,
&tt_global->orig_list);
spin_unlock_bh(&tt_global->list_lock);
@@ -1411,7 +1629,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
goto out;
if (!tt_global_entry) {
- tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
+ tt_global_entry = kmem_cache_zalloc(batadv_tg_cache,
+ GFP_ATOMIC);
if (!tt_global_entry)
goto out;
@@ -1428,13 +1647,13 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
if (flags & BATADV_TT_CLIENT_ROAM)
tt_global_entry->roam_at = jiffies;
kref_init(&common->refcount);
- kref_get(&common->refcount);
common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
atomic_set(&tt_global_entry->orig_list_count, 0);
spin_lock_init(&tt_global_entry->list_lock);
+ kref_get(&common->refcount);
hash_added = batadv_hash_add(bat_priv->tt.global_hash,
batadv_compare_tt,
batadv_choose_tt, common,
@@ -1579,6 +1798,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
return best_entry;
}
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/**
* batadv_tt_global_print_entry - print all orig nodes who announce the address
* for this global entry
@@ -1702,6 +1922,219 @@ out:
batadv_hardif_put(primary_if);
return 0;
}
+#endif
+
+/**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_tt_common_entry *common,
+ struct batadv_tt_orig_list_entry *orig,
+ bool best)
+{
+ void *hdr;
+ struct batadv_orig_node_vlan *vlan;
+ u8 last_ttvn;
+ u32 crc;
+
+ vlan = batadv_orig_node_vlan_get(orig->orig_node,
+ common->vid);
+ if (!vlan)
+ return 0;
+
+ crc = vlan->tt.crc;
+
+ batadv_orig_node_vlan_put(vlan);
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI,
+ BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+ if (!hdr)
+ return -ENOBUFS;
+
+ last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+ if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+ nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig->orig_node->orig) ||
+ nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+ nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+ nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+ nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+ goto nla_put_failure;
+
+ if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct batadv_tt_common_entry *common, int *sub_s)
+{
+ struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+ struct batadv_tt_global_entry *global;
+ struct hlist_head *head;
+ int sub = 0;
+ bool best;
+
+ global = container_of(common, struct batadv_tt_global_entry, common);
+ best_entry = batadv_transtable_best_orig(bat_priv, global);
+ head = &global->orig_list;
+
+ hlist_for_each_entry_rcu(orig_entry, head, list) {
+ if (sub++ < *sub_s)
+ continue;
+
+ best = (orig_entry == best_entry);
+
+ if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+ orig_entry, best)) {
+ *sub_s = sub - 1;
+ return -EMSGSIZE;
+ }
+ }
+
+ *sub_s = 0;
+ return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv,
+ struct hlist_head *head, int *idx_s, int *sub)
+{
+ struct batadv_tt_common_entry *common;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(common, head, hash_entry) {
+ if (idx++ < *idx_s)
+ continue;
+
+ if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+ common, sub)) {
+ rcu_read_unlock();
+ *idx_s = idx - 1;
+ return -EMSGSIZE;
+ }
+ }
+ rcu_read_unlock();
+
+ *idx_s = 0;
+ *sub = 0;
+ return 0;
+}
+
+/**
+ * batadv_tt_global_dump - Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_hashtable *hash;
+ struct hlist_head *head;
+ int ret;
+ int ifindex;
+ int bucket = cb->args[0];
+ int idx = cb->args[1];
+ int sub = cb->args[2];
+ int portid = NETLINK_CB(cb->skb).portid;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ hash = bat_priv->tt.global_hash;
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_tt_global_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq, bat_priv,
+ head, &idx, &sub))
+ break;
+
+ bucket++;
+ }
+
+ ret = msg->len;
+
+ out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+ cb->args[2] = sub;
+
+ return ret;
+}
/**
* _batadv_tt_global_del_orig_entry - remove and free an orig_entry
@@ -2280,7 +2713,7 @@ static void batadv_tt_req_node_release(struct kref *ref)
tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
- kfree(tt_req_node);
+ kmem_cache_free(batadv_tt_req_cache, tt_req_node);
}
/**
@@ -2367,7 +2800,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
goto unlock;
}
- tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC);
+ tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC);
if (!tt_req_node)
goto unlock;
@@ -3104,7 +3537,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
list_del(&node->list);
- kfree(node);
+ kmem_cache_free(batadv_tt_roam_cache, node);
}
spin_unlock_bh(&bat_priv->tt.roam_list_lock);
@@ -3121,7 +3554,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
continue;
list_del(&node->list);
- kfree(node);
+ kmem_cache_free(batadv_tt_roam_cache, node);
}
spin_unlock_bh(&bat_priv->tt.roam_list_lock);
}
@@ -3162,7 +3595,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
}
if (!ret) {
- tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC);
+ tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache,
+ GFP_ATOMIC);
if (!tt_roam_node)
goto unlock;
@@ -3865,3 +4299,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
return ret;
}
+
+/**
+ * batadv_tt_cache_init - Initialize tt memory object cache
+ *
+ * Return: 0 on success or negative error number in case of failure.
+ */
+int __init batadv_tt_cache_init(void)
+{
+ size_t tl_size = sizeof(struct batadv_tt_local_entry);
+ size_t tg_size = sizeof(struct batadv_tt_global_entry);
+ size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry);
+ size_t tt_change_size = sizeof(struct batadv_tt_change_node);
+ size_t tt_req_size = sizeof(struct batadv_tt_req_node);
+ size_t tt_roam_size = sizeof(struct batadv_tt_roam_node);
+
+ batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tl_cache)
+ return -ENOMEM;
+
+ batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tg_cache)
+ goto err_tt_tl_destroy;
+
+ batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache",
+ tt_orig_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tt_orig_cache)
+ goto err_tt_tg_destroy;
+
+ batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache",
+ tt_change_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tt_change_cache)
+ goto err_tt_orig_destroy;
+
+ batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache",
+ tt_req_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tt_req_cache)
+ goto err_tt_change_destroy;
+
+ batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache",
+ tt_roam_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!batadv_tt_roam_cache)
+ goto err_tt_req_destroy;
+
+ return 0;
+
+err_tt_req_destroy:
+ kmem_cache_destroy(batadv_tt_req_cache);
+ batadv_tt_req_cache = NULL;
+err_tt_change_destroy:
+ kmem_cache_destroy(batadv_tt_change_cache);
+ batadv_tt_change_cache = NULL;
+err_tt_orig_destroy:
+ kmem_cache_destroy(batadv_tt_orig_cache);
+ batadv_tt_orig_cache = NULL;
+err_tt_tg_destroy:
+ kmem_cache_destroy(batadv_tg_cache);
+ batadv_tg_cache = NULL;
+err_tt_tl_destroy:
+ kmem_cache_destroy(batadv_tl_cache);
+ batadv_tl_cache = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * batadv_tt_cache_destroy - Destroy tt memory object cache
+ */
+void batadv_tt_cache_destroy(void)
+{
+ kmem_cache_destroy(batadv_tl_cache);
+ kmem_cache_destroy(batadv_tg_cache);
+ kmem_cache_destroy(batadv_tt_orig_cache);
+ kmem_cache_destroy(batadv_tt_change_cache);
+ kmem_cache_destroy(batadv_tt_req_cache);
+ kmem_cache_destroy(batadv_tt_roam_cache);
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 7c7e2c006bfe..783fdba84db2 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -22,8 +22,10 @@
#include <linux/types.h>
+struct netlink_callback;
struct net_device;
struct seq_file;
+struct sk_buff;
int batadv_tt_init(struct batadv_priv *bat_priv);
bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv,
const char *message, bool roaming);
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
s32 match_vid, const char *message);
@@ -59,4 +63,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid);
+int batadv_tt_cache_init(void);
+void batadv_tt_cache_destroy(void);
+
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 3d1cf0fb112d..77654f055f24 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -257,8 +257,13 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
batadv_tvlv_container_remove(bat_priv, tvlv_old);
+
+ kref_get(&tvlv_new->refcount);
hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+
+ /* don't return reference to new tvlv_container */
+ batadv_tvlv_container_put(tvlv_new);
}
/**
@@ -542,8 +547,12 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
INIT_HLIST_NODE(&tvlv_handler->list);
spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ kref_get(&tvlv_handler->refcount);
hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+
+ /* don't return reference to new tvlv_handler */
+ batadv_tvlv_handler_put(tvlv_handler);
}
/**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a64522c3b45d..b3dd1a381aad 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,6 +28,7 @@
#include <linux/if_ether.h>
#include <linux/kref.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/sched.h> /* for linux/wait.h */
#include <linux/spinlock.h>
#include <linux/types.h>
@@ -132,7 +133,6 @@ struct batadv_hard_iface_bat_v {
* @rcu: struct used for freeing in an RCU-safe manner
* @bat_iv: per hard-interface B.A.T.M.A.N. IV data
* @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @cleanup_work: work queue callback item for hard-interface deinit
* @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
* @neigh_list: list of unique single hop neighbors via this interface
* @neigh_list_lock: lock protecting neigh_list
@@ -152,7 +152,6 @@ struct batadv_hard_iface {
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
struct batadv_hard_iface_bat_v bat_v;
#endif
- struct work_struct cleanup_work;
struct dentry *debug_dir;
struct hlist_head neigh_list;
/* neigh_list_lock protects: neigh_list */
@@ -1015,7 +1014,6 @@ struct batadv_priv_bat_v {
* @forw_bcast_list_lock: lock protecting forw_bcast_list
* @tp_list_lock: spinlock protecting @tp_list
* @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft-interface deinit
* @primary_if: one of the hard-interfaces assigned to this mesh interface
* becomes the primary interface
* @algo_ops: routing algorithm used by this mesh interface
@@ -1074,7 +1072,6 @@ struct batadv_priv {
spinlock_t tp_list_lock; /* protects tp_list */
atomic_t tp_num;
struct delayed_work orig_work;
- struct work_struct cleanup_work;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
struct batadv_algo_ops *algo_ops;
struct hlist_head softif_vlan_list;
@@ -1379,6 +1376,7 @@ struct batadv_skb_cb {
* locally generated packet
* @if_outgoing: packet where the packet should be sent to, or NULL if
* unspecified
+ * @queue_left: The queue (counter) this packet was applied to
*/
struct batadv_forw_packet {
struct hlist_node list;
@@ -1391,11 +1389,13 @@ struct batadv_forw_packet {
struct delayed_work delayed_work;
struct batadv_hard_iface *if_incoming;
struct batadv_hard_iface *if_outgoing;
+ atomic_t *queue_left;
};
/**
* struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
* @activate: start routing mechanisms when hard-interface is brought up
+ * (optional)
* @enable: init routing info when hard-interface is enabled
* @disable: de-init routing info when hard-interface is disabled
* @update_mac: (re-)init mac addresses of the protocol information
@@ -1413,11 +1413,13 @@ struct batadv_algo_iface_ops {
/**
* struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
* @hardif_init: called on creation of single hop entry
+ * (optional)
* @cmp: compare the metrics of two neighbors for their respective outgoing
* interfaces
* @is_similar_or_better: check if neigh1 is equally similar or better than
* neigh2 for their respective outgoing interface from the metric prospective
* @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
*/
struct batadv_algo_neigh_ops {
void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1429,26 +1431,64 @@ struct batadv_algo_neigh_ops {
struct batadv_hard_iface *if_outgoing1,
struct batadv_neigh_node *neigh2,
struct batadv_hard_iface *if_outgoing2);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+#endif
+ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *priv,
+ struct batadv_hard_iface *hard_iface);
};
/**
* struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
* @free: free the resources allocated by the routing algorithm for an orig_node
- * object
+ * object (optional)
* @add_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to a new hard-interface being added into the mesh
+ * orig_node due to a new hard-interface being added into the mesh (optional)
* @del_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to an hard-interface being removed from the mesh
+ * orig_node due to an hard-interface being removed from the mesh (optional)
* @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
*/
struct batadv_algo_orig_ops {
void (*free)(struct batadv_orig_node *orig_node);
int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
int del_if_num);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
void (*print)(struct batadv_priv *priv, struct seq_file *seq,
struct batadv_hard_iface *hard_iface);
+#endif
+ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *priv,
+ struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @store_sel_class: parse and stores a new GW selection class (optional)
+ * @show_sel_class: prints the current GW selection class (optional)
+ * @get_best_gw_node: select the best GW from the list of available nodes
+ * (optional)
+ * @is_eligible: check if a newly discovered GW is a potential candidate for
+ * the election as best GW (optional)
+ * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
+ */
+struct batadv_algo_gw_ops {
+ ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
+ size_t count);
+ ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+ struct batadv_gw_node *(*get_best_gw_node)
+ (struct batadv_priv *bat_priv);
+ bool (*is_eligible)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *curr_gw_orig,
+ struct batadv_orig_node *orig_node);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+#endif
+ void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct batadv_priv *priv);
};
/**
@@ -1458,6 +1498,7 @@ struct batadv_algo_orig_ops {
* @iface: callbacks related to interface handling
* @neigh: callbacks related to neighbors handling
* @orig: callbacks related to originators handling
+ * @gw: callbacks related to GW mode
*/
struct batadv_algo_ops {
struct hlist_node list;
@@ -1465,6 +1506,7 @@ struct batadv_algo_ops {
struct batadv_algo_iface_ops iface;
struct batadv_algo_neigh_ops neigh;
struct batadv_algo_orig_ops orig;
+ struct batadv_algo_gw_ops gw;
};
/**
@@ -1564,4 +1606,17 @@ enum batadv_tvlv_handler_flags {
BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
};
+/**
+ * struct batadv_store_mesh_work - Work queue item to detach add/del interface
+ * from sysfs locks
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @soft_iface_name: name of soft-interface to modify
+ * @work: work queue item
+ */
+struct batadv_store_mesh_work {
+ struct net_device *net_dev;
+ char soft_iface_name[IFNAMSIZ];
+ struct work_struct work;
+};
+
#endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index a1cda5d4718d..0aefc011b668 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
+bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
+
obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 63a83d8d7da3..32a02de39cd2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
+ br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+ nbp_switchdev_allowed_egress(p, skb);
}
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f2fede05d32c..1da3221845f1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err5;
+ err = nbp_switchdev_mark_set(p);
+ if (err)
+ goto err6;
+
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
err = nbp_vlan_init(p);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
- goto err6;
+ goto err7;
}
spin_lock_bh(&br->lock);
@@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return 0;
-err6:
+err7:
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
+err6:
netdev_upper_dev_unlink(dev, br->dev);
-
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8e486203d133..3132cfc80e9d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;
+ nbp_switchdev_frame_mark(p, skb);
+
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f2a29e467e78..190a5bc00f4a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1245,14 +1245,30 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return 0;
}
-static size_t bridge_get_linkxstats_size(const struct net_device *dev)
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
{
- struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_port *p = NULL;
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
+ struct net_bridge *br;
int numvls = 0;
- vg = br_vlan_group(br);
+ switch (attr) {
+ case IFLA_STATS_LINK_XSTATS:
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ break;
+ case IFLA_STATS_LINK_XSTATS_SLAVE:
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return 0;
+ br = p->br;
+ vg = nbp_vlan_group(p);
+ break;
+ default:
+ return 0;
+ }
+
if (vg) {
/* we need to count all, even placeholder entries */
list_for_each_entry(v, &vg->vlan_list, vlist)
@@ -1264,45 +1280,42 @@ static size_t bridge_get_linkxstats_size(const struct net_device *dev)
nla_total_size(0);
}
-static size_t brport_get_linkxstats_size(const struct net_device *dev)
-{
- return nla_total_size(sizeof(struct br_mcast_stats)) +
- nla_total_size(0);
-}
-
-static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+static int br_fill_linkxstats(struct sk_buff *skb,
+ const struct net_device *dev,
+ int *prividx, int attr)
{
- size_t retsize = 0;
+ struct nlattr *nla __maybe_unused;
+ struct net_bridge_port *p = NULL;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct net_bridge *br;
+ struct nlattr *nest;
+ int vl_idx = 0;
switch (attr) {
case IFLA_STATS_LINK_XSTATS:
- retsize = bridge_get_linkxstats_size(dev);
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
break;
case IFLA_STATS_LINK_XSTATS_SLAVE:
- retsize = brport_get_linkxstats_size(dev);
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return 0;
+ br = p->br;
+ vg = nbp_vlan_group(p);
break;
+ default:
+ return -EINVAL;
}
- return retsize;
-}
-
-static int bridge_fill_linkxstats(struct sk_buff *skb,
- const struct net_device *dev,
- int *prividx)
-{
- struct net_bridge *br = netdev_priv(dev);
- struct nlattr *nla __maybe_unused;
- struct net_bridge_vlan_group *vg;
- struct net_bridge_vlan *v;
- struct nlattr *nest;
- int vl_idx = 0;
-
nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
if (!nest)
return -EMSGSIZE;
- vg = br_vlan_group(br);
if (vg) {
+ u16 pvid;
+
+ pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
struct bridge_vlan_xstats vxi;
struct br_vlan_stats stats;
@@ -1311,6 +1324,9 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
continue;
memset(&vxi, 0, sizeof(vxi));
vxi.vid = v->vid;
+ vxi.flags = v->flags;
+ if (v->vid == pvid)
+ vxi.flags |= BRIDGE_VLAN_INFO_PVID;
br_vlan_get_stats(v, &stats);
vxi.rx_bytes = stats.rx_bytes;
vxi.rx_packets = stats.rx_packets;
@@ -1329,7 +1345,7 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
BRIDGE_XSTATS_PAD);
if (!nla)
goto nla_put_failure;
- br_multicast_get_stats(br, NULL, nla_data(nla));
+ br_multicast_get_stats(br, p, nla_data(nla));
}
#endif
nla_nest_end(skb, nest);
@@ -1344,52 +1360,6 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int brport_fill_linkxstats(struct sk_buff *skb,
- const struct net_device *dev,
- int *prividx)
-{
- struct net_bridge_port *p = br_port_get_rtnl(dev);
- struct nlattr *nla __maybe_unused;
- struct nlattr *nest;
-
- if (!p)
- return 0;
-
- nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
- if (!nest)
- return -EMSGSIZE;
-#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
- sizeof(struct br_mcast_stats),
- BRIDGE_XSTATS_PAD);
- if (!nla) {
- nla_nest_end(skb, nest);
- return -EMSGSIZE;
- }
- br_multicast_get_stats(p->br, p, nla_data(nla));
-#endif
- nla_nest_end(skb, nest);
-
- return 0;
-}
-
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
- int *prividx, int attr)
-{
- int ret = -EINVAL;
-
- switch (attr) {
- case IFLA_STATS_LINK_XSTATS:
- ret = bridge_fill_linkxstats(skb, dev, prividx);
- break;
- case IFLA_STATS_LINK_XSTATS_SLAVE:
- ret = brport_fill_linkxstats(skb, dev, prividx);
- break;
- }
-
- return ret;
-}
-
static struct rtnl_af_ops br_af_ops __read_mostly = {
.family = AF_BRIDGE,
.get_link_af_size = br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aac2a6e6b008..2379b2b865c9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -251,6 +251,9 @@ struct net_bridge_port
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
};
#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -359,6 +362,11 @@ struct net_bridge
struct timer_list gc_timer;
struct kobject *ifobj;
u32 auto_cnt;
+
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
+
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
u8 vlan_enabled;
@@ -381,6 +389,10 @@ struct br_input_skb_cb {
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool vlan_filtered;
#endif
+
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
static inline void br_sysfs_delbr(struct net_device *dev) { return; }
#endif /* CONFIG_SYSFS */
+/* br_switchdev.c */
+#ifdef CONFIG_NET_SWITCHDEV
+int nbp_switchdev_mark_set(struct net_bridge_port *p);
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb);
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb);
+#else
+static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+ return 0;
+}
+
+static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+}
+
+static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ return true;
+}
+#endif /* CONFIG_NET_SWITCHDEV */
+
#endif
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
new file mode 100644
index 000000000000..f4097b900de1
--- /dev/null
+++ b/net/bridge/br_switchdev.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
+{
+ struct net_bridge_port *p;
+
+ /* dev is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (switchdev_port_same_parent_id(dev, p->dev))
+ return p->offload_fwd_mark;
+ }
+
+ return ++br->offload_fwd_mark;
+}
+
+int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = switchdev_port_attr_get(p->dev, &attr);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+
+ return 0;
+}
+
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
+ BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+}
+
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ return !skb->offload_fwd_mark ||
+ BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index dd6ce598de89..1d5c6dda1988 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
-#ifdef CONFIG_NET_SWITCHDEV
- /* Don't forward if offload device already forwarded */
- if (skb->offload_fwd_mark &&
- skb->offload_fwd_mark == dev->offload_fwd_mark) {
- consume_skb(skb);
- rc = NET_XMIT_SUCCESS;
- goto out;
- }
-#endif
-
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -4292,15 +4282,25 @@ int netif_receive_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_receive_skb);
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+struct flush_work {
+ struct net_device *dev;
+ struct work_struct work;
+};
+
+DEFINE_PER_CPU(struct flush_work, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
{
- struct net_device *dev = arg;
- struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ struct flush_work *flush = container_of(work, typeof(*flush), work);
+ struct net_device *dev = flush->dev;
struct sk_buff *skb, *tmp;
+ struct softnet_data *sd;
+ local_bh_disable();
+ sd = this_cpu_ptr(&softnet_data);
+
+ local_irq_disable();
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
@@ -4310,6 +4310,7 @@ static void flush_backlog(void *arg)
}
}
rps_unlock(sd);
+ local_irq_enable();
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev == dev) {
@@ -4318,6 +4319,27 @@ static void flush_backlog(void *arg)
input_queue_head_incr(sd);
}
}
+ local_bh_enable();
+}
+
+static void flush_all_backlogs(struct net_device *dev)
+{
+ unsigned int cpu;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu) {
+ struct flush_work *flush = per_cpu_ptr(&flush_works, cpu);
+
+ INIT_WORK(&flush->work, flush_backlog);
+ flush->dev = dev;
+ queue_work_on(cpu, system_highpri_wq, &flush->work);
+ }
+
+ for_each_online_cpu(cpu)
+ flush_work(&per_cpu_ptr(&flush_works, cpu)->work);
+
+ put_online_cpus();
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -4805,8 +4827,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
static int process_backlog(struct napi_struct *napi, int quota)
{
- int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+ bool again = true;
+ int work = 0;
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
@@ -4817,23 +4840,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
napi->weight = weight_p;
- local_irq_disable();
- while (1) {
+ while (again) {
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
rcu_read_lock();
- local_irq_enable();
__netif_receive_skb(skb);
rcu_read_unlock();
- local_irq_disable();
input_queue_head_incr(sd);
- if (++work >= quota) {
- local_irq_enable();
+ if (++work >= quota)
return work;
- }
+
}
+ local_irq_disable();
rps_lock(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
@@ -4845,16 +4865,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
* and we dont need an smp_mb() memory barrier.
*/
napi->state = 0;
- rps_unlock(sd);
-
- break;
+ again = false;
+ } else {
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
}
-
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
rps_unlock(sd);
+ local_irq_enable();
}
- local_irq_enable();
return work;
}
@@ -6707,7 +6725,7 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
- on_each_cpu(flush_backlog, dev, 1);
+ flush_all_backlogs(dev);
}
synchronize_net();
@@ -7625,6 +7643,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->all_adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+ hash_init(dev->qdisc_hash);
+#endif
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index cb06aceb512a..a83766be1ad2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1350,14 +1350,18 @@ struct bpf_scratchpad {
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ return skb_ensure_writable(skb, write_len);
+}
+
static inline int bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
- int err;
+ int err = __bpf_try_make_writable(skb, write_len);
- err = skb_ensure_writable(skb, write_len);
bpf_compute_data_end(skb);
-
return err;
}
@@ -1976,8 +1980,8 @@ static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
u32 pkt_type = r2;
/* We only allow a restricted subset to be changed for now. */
- if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
- pkt_type > PACKET_OTHERHOST))
+ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+ !skb_pkt_type_ok(pkt_type)))
return -EINVAL;
skb->pkt_type = pkt_type;
@@ -1992,6 +1996,92 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+ u32 min_len = skb_network_offset(skb);
+
+ if (skb_transport_header_was_set(skb))
+ min_len = skb_transport_offset(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ min_len = skb_checksum_start_offset(skb) +
+ skb->csum_offset + sizeof(__sum16);
+ return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+ return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+ 65536;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ unsigned int old_len = skb->len;
+ int ret;
+
+ ret = __skb_grow_rcsum(skb, new_len);
+ if (!ret)
+ memset(skb->data + old_len, 0, new_len - old_len);
+ return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ return __skb_trim_rcsum(skb, new_len);
+}
+
+static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *)(long) r1;
+ u32 max_len = __bpf_skb_max_len(skb);
+ u32 min_len = __bpf_skb_min_len(skb);
+ u32 new_len = (u32) r2;
+ int ret;
+
+ if (unlikely(flags || new_len > max_len || new_len < min_len))
+ return -EINVAL;
+ if (skb->encapsulation)
+ return -ENOTSUPP;
+
+ /* The basic idea of this helper is that it's performing the
+ * needed work to either grow or trim an skb, and eBPF program
+ * rewrites the rest via helpers like bpf_skb_store_bytes(),
+ * bpf_lX_csum_replace() and others rather than passing a raw
+ * buffer here. This one is a slow path helper and intended
+ * for replies with control messages.
+ *
+ * Like in bpf_skb_change_proto(), we want to keep this rather
+ * minimal and without protocol specifics so that we are able
+ * to separate concerns as in bpf_skb_store_bytes() should only
+ * be the one responsible for writing buffers.
+ *
+ * It's really expected to be a slow path operation here for
+ * control message replies, so we're implicitly linearizing,
+ * uncloning and drop offloads from the skb by this.
+ */
+ ret = __bpf_try_make_writable(skb, skb->len);
+ if (!ret) {
+ if (new_len > skb->len)
+ ret = bpf_skb_grow_rcsum(skb, new_len);
+ else if (new_len < skb->len)
+ ret = bpf_skb_trim_rcsum(skb, new_len);
+ if (!ret && skb_is_gso(skb))
+ skb_gso_reset(skb);
+ }
+
+ bpf_compute_data_end(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+ .func = bpf_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
bool bpf_helper_changes_skb_data(void *func)
{
if (func == bpf_skb_vlan_push)
@@ -2002,6 +2092,8 @@ bool bpf_helper_changes_skb_data(void *func)
return true;
if (func == bpf_skb_change_proto)
return true;
+ if (func == bpf_skb_change_tail)
+ return true;
if (func == bpf_l3_csum_replace)
return true;
if (func == bpf_l4_csum_replace)
@@ -2282,7 +2374,6 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
}
}
-#ifdef CONFIG_SOCK_CGROUP_DATA
static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *)(long)r1;
@@ -2303,7 +2394,7 @@ static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
if (unlikely(!cgrp))
return -EAGAIN;
- return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+ return sk_under_cgroup_hierarchy(sk, cgrp);
}
static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
@@ -2314,7 +2405,41 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
-#endif
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+ unsigned long off, unsigned long len)
+{
+ memcpy(dst_buff, src_buff + off, len);
+ return 0;
+}
+
+static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
+ u64 meta_size)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)(long) r1;
+ struct bpf_map *map = (struct bpf_map *)(long) r2;
+ u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+ void *meta = (void *)(long) r4;
+
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+ return -EINVAL;
+ if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ return -EFAULT;
+
+ return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+ bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+ .func = bpf_xdp_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_STACK,
+ .arg5_type = ARG_CONST_STACK_SIZE,
+};
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2368,6 +2493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_change_proto_proto;
case BPF_FUNC_skb_change_type:
return &bpf_skb_change_type_proto;
+ case BPF_FUNC_skb_change_tail:
+ return &bpf_skb_change_tail_proto;
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
@@ -2386,10 +2513,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
-#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
-#endif
default:
return sk_filter_func_proto(func_id);
}
@@ -2398,7 +2523,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
xdp_func_proto(enum bpf_func_id func_id)
{
- return sk_filter_func_proto(func_id);
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_xdp_event_output_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
}
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f61c5e..a2879c0f6c4c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -116,13 +118,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
+ struct flow_dissector_key_vlan *key_vlan;
struct flow_dissector_key_keyid *key_keyid;
+ bool skip_vlan = false;
u8 ip_proto = 0;
bool ret = false;
if (!data) {
data = skb->data;
- proto = skb->protocol;
+ proto = skb_vlan_tag_present(skb) ?
+ skb->vlan_proto : skb->protocol;
nhoff = skb_network_offset(skb);
hlen = skb_headlen(skb);
}
@@ -241,23 +246,45 @@ ipv6:
case htons(ETH_P_8021AD):
case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
- struct vlan_hdr _vlan;
- vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
- if (!vlan)
- goto out_bad;
+ if (skb_vlan_tag_present(skb))
+ proto = skb->protocol;
+
+ if (!skb_vlan_tag_present(skb) ||
+ proto == cpu_to_be16(ETH_P_8021Q) ||
+ proto == cpu_to_be16(ETH_P_8021AD)) {
+ struct vlan_hdr _vlan;
+ vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
+ data, hlen, &_vlan);
+ if (!vlan)
+ goto out_bad;
+ proto = vlan->h_vlan_encapsulated_proto;
+ nhoff += sizeof(*vlan);
+ if (skip_vlan)
+ goto again;
+ }
+
+ skip_vlan = true;
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_VLANID)) {
- key_tags = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_VLANID,
+ FLOW_DISSECTOR_KEY_VLAN)) {
+ key_vlan = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
target_container);
- key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+ if (skb_vlan_tag_present(skb)) {
+ key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
+ key_vlan->vlan_priority =
+ (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+ } else {
+ key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
+ VLAN_VID_MASK;
+ key_vlan->vlan_priority =
+ (ntohs(vlan->h_vlan_TCI) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
}
- proto = vlan->h_vlan_encapsulated_proto;
- nhoff += sizeof(*vlan);
goto again;
}
case htons(ETH_P_PPP_SES): {
@@ -338,32 +365,42 @@ mpls:
ip_proto_again:
switch (ip_proto) {
case IPPROTO_GRE: {
- struct gre_hdr {
- __be16 flags;
- __be16 proto;
- } *hdr, _hdr;
+ struct gre_base_hdr *hdr, _hdr;
+ u16 gre_ver;
+ int offset = 0;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
goto out_bad;
- /*
- * Only look inside GRE if version zero and no
- * routing
- */
- if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
break;
- proto = hdr->proto;
- nhoff += 4;
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ break;
+
+ proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ break;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
if (hdr->flags & GRE_CSUM)
- nhoff += 4;
+ offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+ sizeof(((struct gre_full_hdr *)0)->reserved1);
+
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
__be32 _keyid;
- keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+ keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
data, hlen, &_keyid);
-
if (!keyid)
goto out_bad;
@@ -372,32 +409,65 @@ ip_proto_again:
key_keyid = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_GRE_KEYID,
target_container);
- key_keyid->keyid = *keyid;
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- nhoff += 4;
+ offset += sizeof(((struct gre_full_hdr *)0)->key);
}
+
if (hdr->flags & GRE_SEQ)
- nhoff += 4;
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
+ offset += sizeof(((struct pptp_gre_header *)0)->seq);
+
+ if (gre_ver == 0) {
+ if (proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, nhoff + offset,
+ sizeof(_eth),
+ data, hlen, &_eth);
+ if (!eth)
+ goto out_bad;
+ proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = (nhoff + offset);
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+ ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+ sizeof(_ppp_hdr), _ppp_hdr);
+ if (!ppp_hdr)
goto out_bad;
- proto = eth->h_proto;
- nhoff += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = nhoff;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
}
+ nhoff += offset;
key_control->flags |= FLOW_DIS_ENCAPSULATION;
if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
goto out_good;
@@ -872,8 +942,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, ports),
},
{
- .key_id = FLOW_DISSECTOR_KEY_VLANID,
- .offset = offsetof(struct flow_keys, tags),
+ .key_id = FLOW_DISSECTOR_KEY_VLAN,
+ .offset = offsetof(struct flow_keys, vlan),
},
{
.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cf26e04c4046..2ae929f9bd06 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
} else
goto out;
} else {
- if (lladdr == neigh->ha && new == NUD_STALE)
+ if (lladdr == neigh->ha && new == NUD_STALE &&
+ !(flags & NEIGH_UPDATE_F_ADMIN))
new = old;
}
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..1fe58167d39a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -37,6 +37,8 @@ struct net init_net = {
};
EXPORT_SYMBOL(init_net);
+static bool init_net_initialized;
+
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -750,6 +752,8 @@ static int __init net_ns_init(void)
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
+ init_net_initialized = true;
+
rtnl_lock();
list_add_tail_rcu(&init_net.list, &net_namespace_list);
rtnl_unlock();
@@ -811,15 +815,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
+ if (!init_net_initialized) {
+ list_add_tail(&ops->list, list);
+ return 0;
+ }
+
return ops_init(ops, &init_net);
}
static void __unregister_pernet_operations(struct pernet_operations *ops)
{
- LIST_HEAD(net_exit_list);
- list_add(&init_net.exit_list, &net_exit_list);
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ if (!init_net_initialized) {
+ list_del(&ops->list);
+ } else {
+ LIST_HEAD(net_exit_list);
+ list_add(&init_net.exit_list, &net_exit_list);
+ ops_exit_list(ops, &net_exit_list);
+ ops_free_list(ops, &net_exit_list);
+ }
}
#endif /* CONFIG_NET_NS */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 189cc78c77eb..318fc5231b2b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -704,6 +704,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
} else if (i == RTAX_FEATURES - 1) {
u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
+ if (!user_features)
+ continue;
BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
if (nla_put_u32(skb, i + 1, user_features))
goto nla_put_failure;
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b60223..51a730485649 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
}
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
- unsigned long nulls1, nulls2;
-
- nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
- nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
- if (nulls1 > nulls2)
- swap(nulls1, nulls2);
-
- if (nulls1 != 0)
- memset((char *)sk, 0, nulls1);
- memset((char *)sk + nulls1 + sizeof(void *), 0,
- nulls2 - nulls1 - sizeof(void *));
- memset((char *)sk + nulls2 + sizeof(void *), 0,
- size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
@@ -1344,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
- if (priority & __GFP_ZERO) {
- if (prot->clear_sk)
- prot->clear_sk(sk, prot->obj_size);
- else
- sk_prot_clear_nulls(sk, prot->obj_size);
- }
+ if (priority & __GFP_ZERO)
+ sk_prot_clear_nulls(sk, prot->obj_size);
} else
sk = kmalloc(prot->obj_size, priority);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 7e68bc6bc853..d8d267e9a872 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -61,27 +61,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
static DEFINE_MUTEX(dsa_switch_drivers_mutex);
static LIST_HEAD(dsa_switch_drivers);
-void register_switch_driver(struct dsa_switch_driver *drv)
+void register_switch_driver(struct dsa_switch_ops *ops)
{
mutex_lock(&dsa_switch_drivers_mutex);
- list_add_tail(&drv->list, &dsa_switch_drivers);
+ list_add_tail(&ops->list, &dsa_switch_drivers);
mutex_unlock(&dsa_switch_drivers_mutex);
}
EXPORT_SYMBOL_GPL(register_switch_driver);
-void unregister_switch_driver(struct dsa_switch_driver *drv)
+void unregister_switch_driver(struct dsa_switch_ops *ops)
{
mutex_lock(&dsa_switch_drivers_mutex);
- list_del_init(&drv->list);
+ list_del_init(&ops->list);
mutex_unlock(&dsa_switch_drivers_mutex);
}
EXPORT_SYMBOL_GPL(unregister_switch_driver);
-static struct dsa_switch_driver *
+static struct dsa_switch_ops *
dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
const char **_name, void **priv)
{
- struct dsa_switch_driver *ret;
+ struct dsa_switch_ops *ret;
struct list_head *list;
const char *name;
@@ -90,13 +90,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
mutex_lock(&dsa_switch_drivers_mutex);
list_for_each(list, &dsa_switch_drivers) {
- struct dsa_switch_driver *drv;
+ struct dsa_switch_ops *ops;
- drv = list_entry(list, struct dsa_switch_driver, list);
+ ops = list_entry(list, struct dsa_switch_ops, list);
- name = drv->probe(parent, host_dev, sw_addr, priv);
+ name = ops->probe(parent, host_dev, sw_addr, priv);
if (name != NULL) {
- ret = drv;
+ ret = ops;
break;
}
}
@@ -117,7 +117,7 @@ static ssize_t temp1_input_show(struct device *dev,
struct dsa_switch *ds = dev_get_drvdata(dev);
int temp, ret;
- ret = ds->drv->get_temp(ds, &temp);
+ ret = ds->ops->get_temp(ds, &temp);
if (ret < 0)
return ret;
@@ -131,7 +131,7 @@ static ssize_t temp1_max_show(struct device *dev,
struct dsa_switch *ds = dev_get_drvdata(dev);
int temp, ret;
- ret = ds->drv->get_temp_limit(ds, &temp);
+ ret = ds->ops->get_temp_limit(ds, &temp);
if (ret < 0)
return ret;
@@ -149,7 +149,7 @@ static ssize_t temp1_max_store(struct device *dev,
if (ret < 0)
return ret;
- ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+ ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
if (ret < 0)
return ret;
@@ -164,7 +164,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev,
bool alarm;
int ret;
- ret = ds->drv->get_temp_alarm(ds, &alarm);
+ ret = ds->ops->get_temp_alarm(ds, &alarm);
if (ret < 0)
return ret;
@@ -184,15 +184,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
{
struct device *dev = container_of(kobj, struct device, kobj);
struct dsa_switch *ds = dev_get_drvdata(dev);
- struct dsa_switch_driver *drv = ds->drv;
+ struct dsa_switch_ops *ops = ds->ops;
umode_t mode = attr->mode;
if (index == 1) {
- if (!drv->get_temp_limit)
+ if (!ops->get_temp_limit)
mode = 0;
- else if (!drv->set_temp_limit)
+ else if (!ops->set_temp_limit)
mode &= ~S_IWUSR;
- } else if (index == 2 && !drv->get_temp_alarm) {
+ } else if (index == 2 && !ops->get_temp_alarm) {
mode = 0;
}
return mode;
@@ -228,8 +228,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
genphy_config_init(phydev);
genphy_read_status(phydev);
- if (ds->drv->adjust_link)
- ds->drv->adjust_link(ds, port, phydev);
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
}
return 0;
@@ -303,7 +303,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
{
- struct dsa_switch_driver *drv = ds->drv;
+ struct dsa_switch_ops *ops = ds->ops;
struct dsa_switch_tree *dst = ds->dst;
struct dsa_chip_data *cd = ds->cd;
bool valid_name_found = false;
@@ -354,7 +354,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* switch.
*/
if (dst->cpu_switch == index) {
- dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+ enum dsa_tag_protocol tag_protocol;
+
+ tag_protocol = ops->get_tag_protocol(ds);
+ dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(dst->tag_ops)) {
ret = PTR_ERR(dst->tag_ops);
goto out;
@@ -368,15 +371,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
/*
* Do basic register setup.
*/
- ret = drv->setup(ds);
+ ret = ops->setup(ds);
if (ret < 0)
goto out;
- ret = drv->set_addr(ds, dst->master_netdev->dev_addr);
+ ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
if (ret < 0)
goto out;
- if (!ds->slave_mii_bus && drv->phy_read) {
+ if (!ds->slave_mii_bus && ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(parent);
if (!ds->slave_mii_bus) {
ret = -ENOMEM;
@@ -423,7 +426,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* register with hardware monitoring subsystem.
* Treat registration error as non-fatal and ignore it.
*/
- if (drv->get_temp) {
+ if (ops->get_temp) {
const char *netname = netdev_name(dst->master_netdev);
char hname[IFNAMSIZ + 1];
int i, j;
@@ -454,7 +457,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
struct device *parent, struct device *host_dev)
{
struct dsa_chip_data *cd = dst->pd->chip + index;
- struct dsa_switch_driver *drv;
+ struct dsa_switch_ops *ops;
struct dsa_switch *ds;
int ret;
const char *name;
@@ -463,8 +466,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
/*
* Probe for switch model.
*/
- drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
- if (drv == NULL) {
+ ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+ if (!ops) {
netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
index);
return ERR_PTR(-EINVAL);
@@ -483,7 +486,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
ds->dst = dst;
ds->index = index;
ds->cd = cd;
- ds->drv = drv;
+ ds->ops = ops;
ds->priv = priv;
ds->dev = parent;
@@ -538,12 +541,12 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
ds->dsa_port_mask |= ~(1 << port);
}
- if (ds->slave_mii_bus && ds->drv->phy_read)
+ if (ds->slave_mii_bus && ds->ops->phy_read)
mdiobus_unregister(ds->slave_mii_bus);
}
#ifdef CONFIG_PM_SLEEP
-static int dsa_switch_suspend(struct dsa_switch *ds)
+int dsa_switch_suspend(struct dsa_switch *ds)
{
int i, ret = 0;
@@ -557,18 +560,19 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
return ret;
}
- if (ds->drv->suspend)
- ret = ds->drv->suspend(ds);
+ if (ds->ops->suspend)
+ ret = ds->ops->suspend(ds);
return ret;
}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-static int dsa_switch_resume(struct dsa_switch *ds)
+int dsa_switch_resume(struct dsa_switch *ds)
{
int i, ret = 0;
- if (ds->drv->resume)
- ret = ds->drv->resume(ds);
+ if (ds->ops->resume)
+ ret = ds->ops->resume(ds);
if (ret)
return ret;
@@ -585,6 +589,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
return 0;
}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
#endif
/* platform driver init and cleanup *****************************************/
@@ -1086,7 +1091,6 @@ static int dsa_resume(struct device *d)
static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
static const struct of_device_id dsa_of_match_table[] = {
- { .compatible = "brcm,bcm7445-switch-v4.0" },
{ .compatible = "marvell,dsa", },
{}
};
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f30bad9678f0..8278385dcd21 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
int err;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
- * driver and before drv->setup() has run, since the switch drivers and
+ * driver and before ops->setup() has run, since the switch drivers and
* the slave MDIO bus driver rely on these values for probing PHY
* devices or not
*/
ds->phys_mii_mask = ds->enabled_port_mask;
- err = ds->drv->setup(ds);
+ err = ds->ops->setup(ds);
if (err < 0)
return err;
- err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
if (err < 0)
return err;
- err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+ err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
if (err < 0)
return err;
- if (!ds->slave_mii_bus && ds->drv->phy_read) {
+ if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
return -ENOMEM;
@@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
dsa_user_port_unapply(port, index, ds);
}
- if (ds->slave_mii_bus && ds->drv->phy_read)
+ if (ds->slave_mii_bus && ds->ops->phy_read)
mdiobus_unregister(ds->slave_mii_bus);
}
@@ -443,6 +443,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
struct dsa_switch_tree *dst,
struct dsa_switch *ds)
{
+ enum dsa_tag_protocol tag_protocol;
struct net_device *ethernet_dev;
struct device_node *ethernet;
@@ -465,7 +466,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
dst->cpu_port = index;
}
- dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+ tag_protocol = ds->ops->get_tag_protocol(ds);
+ dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(dst->tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
return PTR_ERR(dst->tag_ops);
@@ -541,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
ds->ports[reg].dn = port;
- /* Initialize enabled_port_mask now for drv->setup()
+ /* Initialize enabled_port_mask now for ops->setup()
* to have access to a correct value, just like what
* net/dsa/dsa.c::dsa_switch_setup_one does.
*/
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index fc9196745225..9f6c2a20f6ff 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
struct dsa_switch *ds = bus->priv;
if (ds->phys_mii_mask & (1 << addr))
- return ds->drv->phy_read(ds, addr, reg);
+ return ds->ops->phy_read(ds, addr, reg);
return 0xffff;
}
@@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
struct dsa_switch *ds = bus->priv;
if (ds->phys_mii_mask & (1 << addr))
- return ds->drv->phy_write(ds, addr, reg, val);
+ return ds->ops->phy_write(ds, addr, reg, val);
return 0;
}
@@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- if (ds->drv->port_enable) {
- err = ds->drv->port_enable(ds, p->port, p->phy);
+ if (ds->ops->port_enable) {
+ err = ds->ops->port_enable(ds, p->port, p->phy);
if (err)
goto clear_promisc;
}
- if (ds->drv->port_stp_state_set)
- ds->drv->port_stp_state_set(ds, p->port, stp_state);
+ if (ds->ops->port_stp_state_set)
+ ds->ops->port_stp_state_set(ds, p->port, stp_state);
if (p->phy)
phy_start(p->phy);
@@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev)
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
- if (ds->drv->port_disable)
- ds->drv->port_disable(ds, p->port, p->phy);
+ if (ds->ops->port_disable)
+ ds->ops->port_disable(ds, p->port, p->phy);
- if (ds->drv->port_stp_state_set)
- ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
+ if (ds->ops->port_stp_state_set)
+ ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
return 0;
}
@@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
struct dsa_switch *ds = p->parent;
if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
+ if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
return -EOPNOTSUPP;
- return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+ return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans);
}
- ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+ ds->ops->port_vlan_add(ds, p->port, vlan, trans);
return 0;
}
@@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (!ds->drv->port_vlan_del)
+ if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
- return ds->drv->port_vlan_del(ds, p->port, vlan);
+ return ds->ops->port_vlan_del(ds, p->port, vlan);
}
static int dsa_slave_port_vlan_dump(struct net_device *dev,
@@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->port_vlan_dump)
- return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
+ if (ds->ops->port_vlan_dump)
+ return ds->ops->port_vlan_dump(ds, p->port, vlan, cb);
return -EOPNOTSUPP;
}
@@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev,
struct dsa_switch *ds = p->parent;
if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+ if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+ return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans);
}
- ds->drv->port_fdb_add(ds, p->port, fdb, trans);
+ ds->ops->port_fdb_add(ds, p->port, fdb, trans);
return 0;
}
@@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev,
struct dsa_switch *ds = p->parent;
int ret = -EOPNOTSUPP;
- if (ds->drv->port_fdb_del)
- ret = ds->drv->port_fdb_del(ds, p->port, fdb);
+ if (ds->ops->port_fdb_del)
+ ret = ds->ops->port_fdb_del(ds, p->port, fdb);
return ret;
}
@@ -284,8 +284,8 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->port_fdb_dump)
- return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
+ if (ds->ops->port_fdb_dump)
+ return ds->ops->port_fdb_dump(ds, p->port, fdb, cb);
return -EOPNOTSUPP;
}
@@ -308,9 +308,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev,
struct dsa_switch *ds = p->parent;
if (switchdev_trans_ph_prepare(trans))
- return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP;
+ return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
- ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state);
+ ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state);
return 0;
}
@@ -326,8 +326,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
if (switchdev_trans_ph_prepare(trans))
return 0;
- if (ds->drv->port_vlan_filtering)
- return ds->drv->port_vlan_filtering(ds, p->port,
+ if (ds->ops->port_vlan_filtering)
+ return ds->ops->port_vlan_filtering(ds, p->port,
attr->u.vlan_filtering);
return 0;
@@ -365,8 +365,8 @@ static int dsa_slave_ageing_time(struct net_device *dev,
ds->ports[p->port].ageing_time = ageing_time;
ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
- if (ds->drv->set_ageing_time)
- return ds->drv->set_ageing_time(ds, ageing_time);
+ if (ds->ops->set_ageing_time)
+ return ds->ops->set_ageing_time(ds, ageing_time);
return 0;
}
@@ -481,8 +481,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
p->bridge_dev = br;
- if (ds->drv->port_bridge_join)
- ret = ds->drv->port_bridge_join(ds, p->port, br);
+ if (ds->ops->port_bridge_join)
+ ret = ds->ops->port_bridge_join(ds, p->port, br);
return ret == -EOPNOTSUPP ? 0 : ret;
}
@@ -493,16 +493,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev)
struct dsa_switch *ds = p->parent;
- if (ds->drv->port_bridge_leave)
- ds->drv->port_bridge_leave(ds, p->port);
+ if (ds->ops->port_bridge_leave)
+ ds->ops->port_bridge_leave(ds, p->port);
p->bridge_dev = NULL;
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
*/
- if (ds->drv->port_stp_state_set)
- ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
+ if (ds->ops->port_stp_state_set)
+ ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
}
static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -605,8 +605,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->get_regs_len)
- return ds->drv->get_regs_len(ds, p->port);
+ if (ds->ops->get_regs_len)
+ return ds->ops->get_regs_len(ds, p->port);
return -EOPNOTSUPP;
}
@@ -617,8 +617,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->get_regs)
- ds->drv->get_regs(ds, p->port, regs, _p);
+ if (ds->ops->get_regs)
+ ds->ops->get_regs(ds, p->port, regs, _p);
}
static int dsa_slave_nway_reset(struct net_device *dev)
@@ -651,8 +651,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
- if (ds->drv->get_eeprom_len)
- return ds->drv->get_eeprom_len(ds);
+ if (ds->ops->get_eeprom_len)
+ return ds->ops->get_eeprom_len(ds);
return 0;
}
@@ -663,8 +663,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->get_eeprom)
- return ds->drv->get_eeprom(ds, eeprom, data);
+ if (ds->ops->get_eeprom)
+ return ds->ops->get_eeprom(ds, eeprom, data);
return -EOPNOTSUPP;
}
@@ -675,8 +675,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->set_eeprom)
- return ds->drv->set_eeprom(ds, eeprom, data);
+ if (ds->ops->set_eeprom)
+ return ds->ops->set_eeprom(ds, eeprom, data);
return -EOPNOTSUPP;
}
@@ -694,8 +694,8 @@ static void dsa_slave_get_strings(struct net_device *dev,
strncpy(data + len, "tx_bytes", len);
strncpy(data + 2 * len, "rx_packets", len);
strncpy(data + 3 * len, "rx_bytes", len);
- if (ds->drv->get_strings != NULL)
- ds->drv->get_strings(ds, p->port, data + 4 * len);
+ if (ds->ops->get_strings)
+ ds->ops->get_strings(ds, p->port, data + 4 * len);
}
}
@@ -714,8 +714,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
}
- if (ds->drv->get_ethtool_stats)
- ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
+ if (ds->ops->get_ethtool_stats)
+ ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
}
static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
@@ -727,8 +727,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
if (dst->master_ethtool_ops.get_sset_count)
count += dst->master_ethtool_ops.get_sset_count(dev, sset);
- if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
- count += ds->drv->get_sset_count(ds);
+ if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds);
return count;
}
@@ -755,14 +755,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
dst->master_ethtool_ops.get_strings(dev, stringset, data);
}
- if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
+ if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
ndata = data + mcount * len;
/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
* the output after to prepend our CPU port prefix we
* constructed earlier
*/
- ds->drv->get_strings(ds, cpu_port, ndata);
- count = ds->drv->get_sset_count(ds);
+ ds->ops->get_strings(ds, cpu_port, ndata);
+ count = ds->ops->get_sset_count(ds);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
@@ -782,8 +782,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
data[1] = dev->stats.tx_bytes;
data[2] = dev->stats.rx_packets;
data[3] = dev->stats.rx_bytes;
- if (ds->drv->get_ethtool_stats != NULL)
- ds->drv->get_ethtool_stats(ds, p->port, data + 4);
+ if (ds->ops->get_ethtool_stats)
+ ds->ops->get_ethtool_stats(ds, p->port, data + 4);
}
static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
@@ -795,8 +795,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
int count;
count = 4;
- if (ds->drv->get_sset_count != NULL)
- count += ds->drv->get_sset_count(ds);
+ if (ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds);
return count;
}
@@ -809,8 +809,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- if (ds->drv->get_wol)
- ds->drv->get_wol(ds, p->port, w);
+ if (ds->ops->get_wol)
+ ds->ops->get_wol(ds, p->port, w);
}
static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
@@ -819,8 +819,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
struct dsa_switch *ds = p->parent;
int ret = -EOPNOTSUPP;
- if (ds->drv->set_wol)
- ret = ds->drv->set_wol(ds, p->port, w);
+ if (ds->ops->set_wol)
+ ret = ds->ops->set_wol(ds, p->port, w);
return ret;
}
@@ -831,10 +831,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
struct dsa_switch *ds = p->parent;
int ret;
- if (!ds->drv->set_eee)
+ if (!ds->ops->set_eee)
return -EOPNOTSUPP;
- ret = ds->drv->set_eee(ds, p->port, p->phy, e);
+ ret = ds->ops->set_eee(ds, p->port, p->phy, e);
if (ret)
return ret;
@@ -850,10 +850,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
struct dsa_switch *ds = p->parent;
int ret;
- if (!ds->drv->get_eee)
+ if (!ds->ops->get_eee)
return -EOPNOTSUPP;
- ret = ds->drv->get_eee(ds, p->port, e);
+ ret = ds->ops->get_eee(ds, p->port, e);
if (ret)
return ret;
@@ -988,8 +988,8 @@ static void dsa_slave_adjust_link(struct net_device *dev)
p->old_pause = p->phy->pause;
}
- if (ds->drv->adjust_link && status_changed)
- ds->drv->adjust_link(ds, p->port, p->phy);
+ if (ds->ops->adjust_link && status_changed)
+ ds->ops->adjust_link(ds, p->port, p->phy);
if (status_changed)
phy_print_status(p->phy);
@@ -1004,8 +1004,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev,
if (dev) {
p = netdev_priv(dev);
ds = p->parent;
- if (ds->drv->fixed_link_update)
- ds->drv->fixed_link_update(ds, p->port, status);
+ if (ds->ops->fixed_link_update)
+ ds->ops->fixed_link_update(ds, p->port, status);
}
return 0;
@@ -1062,8 +1062,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
phy_dn = port_dn;
}
- if (ds->drv->get_phy_flags)
- phy_flags = ds->drv->get_phy_flags(ds, p->port);
+ if (ds->ops->get_phy_flags)
+ phy_flags = ds->ops->get_phy_flags(ds, p->port);
if (phy_dn) {
int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 55513e654d79..e94b47be0019 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog)
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
- /* Check special setups for testing purpose to enable TFO w/o
- * requiring TCP_FASTOPEN sockopt.
+ /* Enable TFO w/o requiring TCP_FASTOPEN socket option.
* Note that only TCP sockets (SOCK_STREAM) will reach here.
- * Also fastopenq may already been allocated because this
- * socket was in TCP_LISTEN state previously but was
- * shutdown() (rather than close()).
+ * Also fastopen backlog may already been set via the option
+ * because the socket was in TCP_LISTEN state previously but
+ * was shutdown() rather than close().
*/
- if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+ if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
- fastopen_queue_tune(sk, backlog);
- else if ((sysctl_tcp_fastopen &
- TFO_SERVER_WO_SOCKOPT2) != 0)
- fastopen_queue_tune(sk,
- ((uint)sysctl_tcp_fastopen) >> 16);
-
+ fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
}
+
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
@@ -921,6 +916,8 @@ const struct proto_ops inet_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
+ .read_sock = tcp_read_sock,
+ .peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ef2ebeb89d0f..317c31939732 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
return NULL;
switch (id) {
- case RT_TABLE_LOCAL:
- rcu_assign_pointer(net->ipv4.fib_local, tb);
- break;
case RT_TABLE_MAIN:
rcu_assign_pointer(net->ipv4.fib_main, tb);
break;
@@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old,
{
#ifdef CONFIG_IP_MULTIPLE_TABLES
switch (new->tb_id) {
- case RT_TABLE_LOCAL:
- rcu_assign_pointer(net->ipv4.fib_local, new);
- break;
case RT_TABLE_MAIN:
rcu_assign_pointer(net->ipv4.fib_main, new);
break;
@@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net)
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
- RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 539fa264e67d..8066ccc48a17 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1576,7 +1576,8 @@ static bool fib_good_nh(const struct fib_nh *nh)
rcu_read_lock_bh();
- n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
+ n = __ipv4_neigh_lookup_noref(nh->nh_dev,
+ (__force u32)nh->nh_gw);
if (n)
state = n->nud_state;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9b4ca87f70ba..606cc3e85d2b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
continue;
}
+ /* Based on RFC3376 5.1. Should not send source-list change
+ * records when there is a filter mode change.
+ */
+ if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
+ (!gdeleted && pmc->crcount)) &&
+ (type == IGMPV3_ALLOW_NEW_SOURCES ||
+ type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+ goto decrease_sf_crcount;
+
/* clear marks on query responses */
if (isquery)
psf->sf_gsresp = 0;
@@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
scount++; stotal++;
if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
psf->sf_crcount--;
if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
if (psf_prev)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 38c2c47fe0e8..abfbe492ebfe 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,6 +45,7 @@ struct inet_diag_entry {
u16 family;
u16 userlocks;
u32 ifindex;
+ u32 mark;
};
static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -580,6 +581,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
yes = 0;
break;
}
+ case INET_DIAG_BC_MARK_COND: {
+ struct inet_diag_markcond *cond;
+
+ cond = (struct inet_diag_markcond *)(op + 1);
+ if ((entry->mark & cond->mask) != cond->mark)
+ yes = 0;
+ break;
+ }
}
if (yes) {
@@ -624,6 +633,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.dport = ntohs(inet->inet_dport);
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+ if (sk_fullsock(sk))
+ entry.mark = sk->sk_mark;
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+ else
+ entry.mark = 0;
return inet_diag_bc_run(bc, &entry);
}
@@ -706,10 +721,25 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op,
return true;
}
-static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
+ int *min_len)
+{
+ *min_len += sizeof(struct inet_diag_markcond);
+ return len >= *min_len;
+}
+
+static int inet_diag_bc_audit(const struct nlattr *attr,
+ const struct sk_buff *skb)
{
- const void *bc = bytecode;
- int len = bytecode_len;
+ bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+ const void *bytecode, *bc;
+ int bytecode_len, len;
+
+ if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+ return -EINVAL;
+
+ bytecode = bc = nla_data(attr);
+ len = bytecode_len = nla_len(attr);
while (len > 0) {
int min_len = sizeof(struct inet_diag_bc_op);
@@ -732,6 +762,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
if (!valid_port_comparison(bc, len, &min_len))
return -EINVAL;
break;
+ case INET_DIAG_BC_MARK_COND:
+ if (!net_admin)
+ return -EPERM;
+ if (!valid_markcond(bc, len, &min_len))
+ return -EINVAL;
+ break;
case INET_DIAG_BC_AUTO:
case INET_DIAG_BC_JMP:
case INET_DIAG_BC_NOP:
@@ -1020,13 +1056,13 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (nlmsg_attrlen(nlh, hdrlen)) {
struct nlattr *attr;
+ int err;
attr = nlmsg_find_attr(nlh, hdrlen,
INET_DIAG_REQ_BYTECODE);
- if (!attr ||
- nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
- inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
- return -EINVAL;
+ err = inet_diag_bc_audit(attr, skb);
+ if (err)
+ return err;
}
{
struct netlink_dump_control c = {
@@ -1051,13 +1087,13 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
h->nlmsg_flags & NLM_F_DUMP) {
if (nlmsg_attrlen(h, hdrlen)) {
struct nlattr *attr;
+ int err;
attr = nlmsg_find_attr(h, hdrlen,
INET_DIAG_REQ_BYTECODE);
- if (!attr ||
- nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
- inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
- return -EINVAL;
+ err = inet_diag_bc_audit(attr, skb);
+ if (err)
+ return err;
}
{
struct netlink_dump_control c = {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1d71c40eaaf3..071a785c65eb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -85,7 +85,6 @@
/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */
#define CONF_SEND_RETRIES 6 /* Send six requests per open */
-#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */
#define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */
#define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */
#define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */
@@ -188,7 +187,7 @@ struct ic_device {
};
static struct ic_device *ic_first_dev __initdata; /* List of open device */
-static struct net_device *ic_dev __initdata; /* Selected device */
+static struct ic_device *ic_dev __initdata; /* Selected device */
static bool __init ic_is_init_dev(struct net_device *dev)
{
@@ -307,7 +306,7 @@ static void __init ic_close_devs(void)
while ((d = next)) {
next = d->next;
dev = d->dev;
- if (dev != ic_dev && !netdev_uses_dsa(dev)) {
+ if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags);
}
@@ -372,7 +371,7 @@ static int __init ic_setup_if(void)
int err;
memset(&ir, 0, sizeof(ir));
- strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+ strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
set_sockaddr(sin, ic_myaddr, 0);
if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface address (%d)\n",
@@ -396,7 +395,7 @@ static int __init ic_setup_if(void)
* out, we'll try to muddle along.
*/
if (ic_dev_mtu != 0) {
- strcpy(ir.ifr_name, ic_dev->name);
+ strcpy(ir.ifr_name, ic_dev->dev->name);
ir.ifr_mtu = ic_dev_mtu;
if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
@@ -568,7 +567,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop_unlock;
/* We have a winner! */
- ic_dev = dev;
+ ic_dev = d;
if (ic_myaddr == NONE)
ic_myaddr = tip;
ic_servaddr = sip;
@@ -655,8 +654,6 @@ static struct packet_type bootp_packet_type __initdata = {
.func = ic_bootp_recv,
};
-static __be32 ic_dev_xid; /* Device under configuration */
-
/*
* Initialize DHCP/BOOTP extension fields in the request.
*/
@@ -666,14 +663,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 };
#ifdef IPCONFIG_DHCP
static void __init
-ic_dhcp_init_options(u8 *options)
+ic_dhcp_init_options(u8 *options, struct ic_device *d)
{
u8 mt = ((ic_servaddr == NONE)
? DHCPDISCOVER : DHCPREQUEST);
u8 *e = options;
int len;
- pr_debug("DHCP: Sending message type %d\n", mt);
+ pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name);
memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */
e += 4;
@@ -857,7 +854,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
/* add DHCP options or BOOTP extensions */
#ifdef IPCONFIG_DHCP
if (ic_proto_enabled & IC_USE_DHCP)
- ic_dhcp_init_options(b->exten);
+ ic_dhcp_init_options(b->exten, d);
else
#endif
ic_bootp_init_ext(b->exten);
@@ -1033,14 +1030,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
/* Is it a reply to our BOOTP request? */
if (b->op != BOOTP_REPLY ||
b->xid != d->xid) {
- net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
- b->op, b->xid);
- goto drop_unlock;
- }
-
- /* Is it a reply for the device we are configuring? */
- if (b->xid != ic_dev_xid) {
- net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
+ net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n",
+ d->dev->name, b->op, b->xid);
goto drop_unlock;
}
@@ -1075,7 +1066,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
}
}
- pr_debug("DHCP: Got message type %d\n", mt);
+ pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name);
switch (mt) {
case DHCPOFFER:
@@ -1130,7 +1121,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
}
/* We have a winner! */
- ic_dev = dev;
+ ic_dev = d;
ic_myaddr = b->your_ip;
ic_servaddr = b->server_ip;
ic_addrservaddr = b->iph.saddr;
@@ -1225,9 +1216,6 @@ static int __init ic_dynamic(void)
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
for (;;) {
#ifdef IPCONFIG_BOOTP
- /* Track the device we are configuring */
- ic_dev_xid = d->xid;
-
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
#endif
@@ -1236,15 +1224,19 @@ static int __init ic_dynamic(void)
ic_rarp_send_if(d);
#endif
- jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
- while (time_before(jiffies, jiff) && !ic_got_reply)
- schedule_timeout_uninterruptible(1);
+ if (!d->next) {
+ jiff = jiffies + timeout;
+ while (time_before(jiffies, jiff) && !ic_got_reply)
+ schedule_timeout_uninterruptible(1);
+ }
#ifdef IPCONFIG_DHCP
/* DHCP isn't done until we get a DHCPACK. */
if ((ic_got_reply & IC_BOOTP) &&
(ic_proto_enabled & IC_USE_DHCP) &&
ic_dhcp_msgtype != DHCPACK) {
ic_got_reply = 0;
+ /* continue on device that got the reply */
+ d = ic_dev;
pr_cont(",");
continue;
}
@@ -1487,7 +1479,7 @@ static int __init ip_auto_config(void)
#endif /* IPCONFIG_DYNAMIC */
} else {
/* Device selected manually or only one device -> use it */
- ic_dev = ic_first_dev->dev;
+ ic_dev = ic_first_dev;
}
addr = root_nfs_parse_addr(root_server_path);
@@ -1501,14 +1493,6 @@ static int __init ip_auto_config(void)
return -1;
/*
- * Close all network devices except the device we've
- * autoconfigured and set up routes.
- */
- ic_close_devs();
- if (ic_setup_if() < 0 || ic_setup_routes() < 0)
- return -1;
-
- /*
* Record which protocol was actually used.
*/
#ifdef IPCONFIG_DYNAMIC
@@ -1522,7 +1506,7 @@ static int __init ip_auto_config(void)
pr_info("IP-Config: Complete:\n");
pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
- ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+ ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr,
&ic_myaddr, &ic_netmask, &ic_gateway);
pr_info(" host=%s, domain=%s, nis-domain=%s\n",
utsname()->nodename, ic_domain, utsname()->domainname);
@@ -1542,7 +1526,18 @@ static int __init ip_auto_config(void)
pr_cont("\n");
#endif /* !SILENT */
- return 0;
+ /*
+ * Close all network devices except the device we've
+ * autoconfigured and set up routes.
+ */
+ if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+ err = -1;
+ else
+ err = 0;
+
+ ic_close_devs();
+
+ return err;
}
late_initcall(ip_auto_config);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f665b63a927..1ed015e4bc79 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+ SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE),
SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ffbb218de520..77311a92275c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1570,6 +1570,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_peek_len(struct socket *sock)
+{
+ return tcp_inq(sock->sk);
+}
+EXPORT_SYMBOL(tcp_peek_len);
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -3092,23 +3098,6 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
}
EXPORT_SYMBOL(tcp_get_md5sig_pool);
-int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
- const struct tcphdr *th)
-{
- struct scatterlist sg;
- struct tcphdr hdr;
-
- /* We are not allowed to change tcphdr, make a local copy */
- memcpy(&hdr, th, sizeof(hdr));
- hdr.check = 0;
-
- /* options aren't included in the hash */
- sg_init_one(&sg, &hdr, sizeof(hdr));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
- return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_header);
-
int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
const struct sk_buff *skb, unsigned int header_len)
{
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ebf45b38bc3..8cd02c0b056c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4392,12 +4392,9 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
if (tcp_prune_queue(sk) < 0)
return -1;
- if (!sk_rmem_schedule(sk, skb, size)) {
+ while (!sk_rmem_schedule(sk, skb, size)) {
if (!tcp_prune_ofo_queue(sk))
return -1;
-
- if (!sk_rmem_schedule(sk, skb, size))
- return -1;
}
}
return 0;
@@ -4874,29 +4871,41 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
}
/*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ * (But if application shrinks SO_RCVBUF, we could still end up
+ * freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
*/
static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool res = false;
+ struct sk_buff *skb;
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
- __skb_queue_purge(&tp->out_of_order_queue);
+ if (skb_queue_empty(&tp->out_of_order_queue))
+ return false;
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tp->rx_opt.sack_ok)
- tcp_sack_reset(&tp->rx_opt);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+
+ while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) {
+ tcp_drop(sk, skb);
sk_mem_reclaim(sk);
- res = true;
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !tcp_under_memory_pressure(sk))
+ break;
}
- return res;
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ return true;
}
/* Reduce allocated memory if we can, trying to get
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7158d4f8dae4..a75bf48d7950 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1175,6 +1175,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
&iph->saddr, ntohs(th->source),
&iph->daddr, ntohs(th->dest),
@@ -1537,6 +1538,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_prequeue);
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+{
+ u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+
+ /* Only socket owner can try to collapse/prune rx queues
+ * to reduce memory overhead, so add a little headroom here.
+ * Few sockets backlog are possibly concurrently non empty.
+ */
+ limit += 64*1024;
+
+ /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
+ * we can fix skb->truesize to its real value to avoid future drops.
+ * This is valid because skb is not yet charged to the socket.
+ * It has been noticed pure SACK packets were sometimes dropped
+ * (if cooked by drivers without copybreak feature).
+ */
+ if (!skb->data_len)
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+
+ if (unlikely(sk_add_backlog(sk, skb, limit))) {
+ bh_unlock_sock(sk);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(tcp_add_backlog);
+
/*
* From tcp_input.c
*/
@@ -1608,6 +1637,7 @@ process:
sk = req->rsk_listener;
if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
}
@@ -1666,10 +1696,7 @@ process:
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
- } else if (unlikely(sk_add_backlog(sk, skb,
- sk->sk_rcvbuf + sk->sk_sndbuf))) {
- bh_unlock_sock(sk);
- __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+ } else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdaef7fd6e47..8b45794eb6b2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2776,7 +2776,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
tcp_for_write_queue_from(skb, sk) {
- __u8 sacked = TCP_SKB_CB(skb)->sacked;
+ __u8 sacked;
int segs;
if (skb == tcp_send_head(sk))
@@ -2788,6 +2788,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
if (segs <= 0)
return;
+ sacked = TCP_SKB_CB(skb)->sacked;
/* In case tcp_shift_skb_data() have aggregated large skbs,
* we need to make sure not sending too bigs TSO packets
*/
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5fdcb8d108d4..058c31286ce1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
#include <net/busy_poll.h>
#include "udp_impl.h"
#include <net/sock_reuseport.h>
+#include <net/addrconf.h>
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -2192,6 +2193,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
EXPORT_SYMBOL(udp_poll);
+int udp_abort(struct sock *sk, int err)
+{
+ lock_sock(sk);
+
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ udp_disconnect(sk, 0);
+
+ release_sock(sk);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_abort);
+
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
@@ -2221,7 +2236,7 @@ struct proto udp_prot = {
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
+ .diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 3d5ccf4b1412..8a9f6e535caa 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -165,12 +165,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_wqueue = sk_wmem_alloc_get(sk);
}
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int __udp_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *req,
+ struct udp_table *tbl)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sock *sk;
+ int err;
+
+ rcu_read_lock();
+
+ if (req->sdiag_family == AF_INET)
+ sk = __udp4_lib_lookup(net,
+ req->id.idiag_dst[0], req->id.idiag_dport,
+ req->id.idiag_src[0], req->id.idiag_sport,
+ req->id.idiag_if, tbl, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (req->sdiag_family == AF_INET6) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+ ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+ sk = __udp4_lib_lookup(net,
+ req->id.idiag_dst[0], req->id.idiag_dport,
+ req->id.idiag_src[0], req->id.idiag_sport,
+ req->id.idiag_if, tbl, NULL);
+
+ else
+ sk = __udp6_lib_lookup(net,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ req->id.idiag_if, tbl, NULL);
+ }
+#endif
+ else {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+
+ rcu_read_unlock();
+
+ if (!sk)
+ return -ENOENT;
+
+ if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+ sock_put(sk);
+ return -ENOENT;
+ }
+
+ err = sock_diag_destroy(sk, ECONNABORTED);
+
+ sock_put(sk);
+
+ return err;
+}
+
+static int udp_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *req)
+{
+ return __udp_diag_destroy(in_skb, req, &udp_table);
+}
+
+static int udplite_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *req)
+{
+ return __udp_diag_destroy(in_skb, req, &udplite_table);
+}
+
+#endif
+
static const struct inet_diag_handler udp_diag_handler = {
.dump = udp_diag_dump,
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDP,
.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+ .destroy = udp_diag_destroy,
+#endif
};
static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -192,6 +268,9 @@ static const struct inet_diag_handler udplite_diag_handler = {
.idiag_get_info = udp_diag_get_info,
.idiag_type = IPPROTO_UDPLITE,
.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+ .destroy = udplite_diag_destroy,
+#endif
};
static int __init udp_diag_init(void)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2eea073e27ef..af817158d830 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -60,7 +60,6 @@ struct proto udplite_prot = {
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
};
EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b454055ba625..46ad699937fd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
+ .read_sock = tcp_read_sock,
+ .peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index ec9efbcdad35..aba0998ddbfb 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -172,6 +172,5 @@ static void __exit ila_fini(void)
module_init(ila_init);
module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 704274cbd495..397e1ed3daa3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -61,12 +61,12 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT 5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
static int ip6gre_net_id __read_mostly;
struct ip6gre_net {
- struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+ struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct net_device *fb_tunnel_dev;
};
@@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
will match fallback tunnel.
*/
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
static u32 HASH_ADDR(const struct in6_addr *addr)
{
u32 hash = ipv6_addr_hash(addr);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
}
#define tunnels_r_l tunnels[3]
@@ -1087,7 +1087,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
struct ip6_tnl *t;
t = rtnl_dereference(ign->tunnels[prio][h]);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7b0481e3738f..2050217df565 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -64,8 +64,8 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT 5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
@@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
}
static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,7 +87,7 @@ struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
@@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
if (dev->rtnl_link_ops == &ip6_link_ops)
unregister_netdevice_queue(dev, &list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
/* If dev is in the same netns, it has already
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f14040..cc7e05898307 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -50,14 +50,14 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT 5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
}
static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@ struct vti6_net {
/* the vti6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
@@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
struct ip6_tnl *t;
LIST_HEAD(list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, &list);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e83664..75c1fc54f188 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
continue;
}
+ /* Based on RFC3810 6.1. Should not send source-list change
+ * records when there is a filter mode change.
+ */
+ if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+ (!gdeleted && pmc->mca_crcount)) &&
+ (type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+ goto decrease_sf_crcount;
+
/* clear marks on query responses */
if (isquery)
psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
scount++; stotal++;
if ((type == MLD2_ALLOW_NEW_SOURCES ||
type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
psf->sf_crcount--;
if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
if (psf_prev)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 182b6a9be29d..b1cdf8009d29 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,7 @@
For comments look at net/ipv4/ip_gre.c --ANK
*/
-#define HASH_SIZE 16
+#define IP6_SIT_HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static bool log_ecn_error = true;
@@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
- struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
struct ip_tunnel __rcu *tunnels_wc[1];
struct ip_tunnel __rcu **tunnels[4];
@@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
{
return ipproto == IPPROTO_IPV6 ||
ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
for (prio = 1; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
struct ip_tunnel *t;
t = rtnl_dereference(sitn->tunnels[prio][h]);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 94f4f89d73e7..04529a3d42cb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
genhash ? "failed" : "mismatch",
&ip6h->saddr, ntohs(th->source),
@@ -1415,6 +1416,7 @@ process:
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
}
@@ -1471,10 +1473,7 @@ process:
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
ret = tcp_v6_do_rcv(sk, skb);
- } else if (unlikely(sk_add_backlog(sk, skb,
- sk->sk_rcvbuf + sk->sk_sndbuf))) {
- bh_unlock_sock(sk);
- __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+ } else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1868,17 +1867,6 @@ void tcp6_proc_exit(struct net *net)
}
#endif
-static void tcp_v6_clear_sk(struct sock *sk, int size)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- /* we do not want to clear pinet6 field, because of RCU lookups */
- sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
-
- size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
- memset(&inet->pinet6 + 1, 0, size);
-}
-
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -1920,7 +1908,6 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
- .clear_sk = tcp_v6_clear_sk,
.diag_destroy = tcp_abort,
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 19ac3a1c308d..9aa7c1c7a9ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net)
}
#endif /* CONFIG_PROC_FS */
-void udp_v6_clear_sk(struct sock *sk, int size)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- /* we do not want to clear pinet6 field, because of RCU lookups */
- sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
-
- size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
- memset(&inet->pinet6 + 1, 0, size);
-}
-
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
@@ -1465,7 +1454,7 @@ struct proto udpv6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = udp_v6_clear_sk,
+ .diag_destroy = udp_abort,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 0682c031ccdc..f6eb1ab34f4b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
-void udp_v6_clear_sk(struct sock *sk, int size);
-
#ifdef CONFIG_PROC_FS
int udp6_seq_show(struct seq_file *seq, void *v);
#endif
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index fd6ef414899b..47d0d2b87106 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,7 +55,6 @@ struct proto udplitev6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udplite6_protosw = {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 8d2f7c9b491d..db639690c205 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -845,9 +845,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
if (sock->state != SS_UNCONNECTED)
goto out;
- if ((sk = sock->sk) == NULL)
- goto out;
-
err = -EOPNOTSUPP;
if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
(sk->sk_type != SOCK_DGRAM))
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index 5db94d940ecc..87fca36e6c47 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig
@@ -3,6 +3,7 @@ config AF_KCM
tristate "KCM sockets"
depends on INET
select BPF_SYSCALL
+ select STREAM_PARSER
---help---
KCM (Kernel Connection Multiplexor) sockets provide a method
for multiplexing messages of a message based application
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 16c2e03bd388..bf75c9231cca 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
seq_printf(seq,
" psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
psock->index,
- psock->stats.rx_msgs,
- psock->stats.rx_bytes,
+ psock->strp.stats.rx_msgs,
+ psock->strp.stats.rx_bytes,
psock->stats.tx_msgs,
psock->stats.tx_bytes,
psock->sk->sk_receive_queue.qlen,
@@ -170,14 +170,27 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
if (psock->tx_stopped)
seq_puts(seq, "TxStop ");
- if (psock->rx_stopped)
+ if (psock->strp.rx_stopped)
seq_puts(seq, "RxStop ");
if (psock->tx_kcm)
seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
- if (psock->ready_rx_msg)
- seq_puts(seq, "RdyRx ");
+ if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+ if (psock->sk->sk_receive_queue.qlen) {
+ if (psock->strp.rx_need_bytes)
+ seq_printf(seq, "RxWait=%u ",
+ psock->strp.rx_need_bytes);
+ else
+ seq_printf(seq, "RxWait ");
+ }
+ } else {
+ if (psock->strp.rx_paused)
+ seq_puts(seq, "RxPause ");
+
+ if (psock->ready_rx_msg)
+ seq_puts(seq, "RdyRx ");
+ }
seq_puts(seq, "\n");
}
@@ -275,6 +288,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
{
struct kcm_psock_stats psock_stats;
struct kcm_mux_stats mux_stats;
+ struct strp_aggr_stats strp_stats;
struct kcm_mux *mux;
struct kcm_psock *psock;
struct net *net = seq->private;
@@ -282,20 +296,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
memset(&mux_stats, 0, sizeof(mux_stats));
memset(&psock_stats, 0, sizeof(psock_stats));
+ memset(&strp_stats, 0, sizeof(strp_stats));
mutex_lock(&knet->mutex);
aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
aggregate_psock_stats(&knet->aggregate_psock_stats,
&psock_stats);
+ aggregate_strp_stats(&knet->aggregate_strp_stats,
+ &strp_stats);
list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
spin_lock_bh(&mux->lock);
aggregate_mux_stats(&mux->stats, &mux_stats);
aggregate_psock_stats(&mux->aggregate_psock_stats,
&psock_stats);
- list_for_each_entry(psock, &mux->psocks, psock_list)
+ aggregate_strp_stats(&mux->aggregate_strp_stats,
+ &strp_stats);
+ list_for_each_entry(psock, &mux->psocks, psock_list) {
aggregate_psock_stats(&psock->stats, &psock_stats);
+ save_strp_stats(&psock->strp, &strp_stats);
+ }
+
spin_unlock_bh(&mux->lock);
}
@@ -328,7 +350,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
mux_stats.rx_ready_drops);
seq_printf(seq,
- "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+ "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
"Psock",
"RX-Msgs",
"RX-Bytes",
@@ -337,6 +359,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
"Reserved",
"Unreserved",
"RX-Aborts",
+ "RX-Intr",
+ "RX-Unrecov",
"RX-MemFail",
"RX-NeedMor",
"RX-BadLen",
@@ -345,20 +369,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
"TX-Aborts");
seq_printf(seq,
- "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+ "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
"",
- psock_stats.rx_msgs,
- psock_stats.rx_bytes,
+ strp_stats.rx_msgs,
+ strp_stats.rx_bytes,
psock_stats.tx_msgs,
psock_stats.tx_bytes,
psock_stats.reserved,
psock_stats.unreserved,
- psock_stats.rx_aborts,
- psock_stats.rx_mem_fail,
- psock_stats.rx_need_more_hdr,
- psock_stats.rx_bad_hdr_len,
- psock_stats.rx_msg_too_big,
- psock_stats.rx_msg_timeouts,
+ strp_stats.rx_aborts,
+ strp_stats.rx_interrupted,
+ strp_stats.rx_unrecov_intr,
+ strp_stats.rx_mem_fail,
+ strp_stats.rx_need_more_hdr,
+ strp_stats.rx_bad_hdr_len,
+ strp_stats.rx_msg_too_big,
+ strp_stats.rx_msg_timeouts,
psock_stats.tx_aborts);
return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cb39e05b166c..2632ac748371 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1,3 +1,13 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
#include <linux/bpf.h>
#include <linux/errno.h>
#include <linux/errqueue.h>
@@ -16,7 +26,6 @@
#include <net/kcm.h>
#include <net/netns/generic.h>
#include <net/sock.h>
-#include <net/tcp.h>
#include <uapi/linux/kcm.h>
unsigned int kcm_net_id;
@@ -35,38 +44,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
return (struct kcm_tx_msg *)skb->cb;
}
-static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
-{
- return (struct kcm_rx_msg *)((void *)skb->cb +
- offsetof(struct qdisc_skb_cb, data));
-}
-
static void report_csk_error(struct sock *csk, int err)
{
csk->sk_err = EPIPE;
csk->sk_error_report(csk);
}
-/* Callback lock held */
-static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
- struct sk_buff *skb)
-{
- struct sock *csk = psock->sk;
-
- /* Unrecoverable error in receive */
-
- del_timer(&psock->rx_msg_timer);
-
- if (psock->rx_stopped)
- return;
-
- psock->rx_stopped = 1;
- KCM_STATS_INCR(psock->stats.rx_aborts);
-
- /* Report an error on the lower socket */
- report_csk_error(csk, err);
-}
-
static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
bool wakeup_kcm)
{
@@ -109,12 +92,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
struct kcm_psock *psock)
{
- KCM_STATS_ADD(mux->stats.rx_bytes,
- psock->stats.rx_bytes - psock->saved_rx_bytes);
+ STRP_STATS_ADD(mux->stats.rx_bytes,
+ psock->strp.stats.rx_bytes -
+ psock->saved_rx_bytes);
mux->stats.rx_msgs +=
- psock->stats.rx_msgs - psock->saved_rx_msgs;
- psock->saved_rx_msgs = psock->stats.rx_msgs;
- psock->saved_rx_bytes = psock->stats.rx_bytes;
+ psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
+ psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
+ psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
}
static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -167,11 +151,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
*/
list_del(&psock->psock_ready_list);
psock->ready_rx_msg = NULL;
-
/* Commit clearing of ready_rx_msg for queuing work */
smp_mb();
- queue_work(kcm_wq, &psock->rx_work);
+ strp_unpause(&psock->strp);
+ strp_check_rcv(&psock->strp);
}
/* Buffer limit is okay now, add to ready list */
@@ -285,6 +269,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
if (list_empty(&mux->kcm_rx_waiters)) {
psock->ready_rx_msg = head;
+ strp_pause(&psock->strp);
list_add_tail(&psock->psock_ready_list,
&mux->psocks_ready);
spin_unlock_bh(&mux->rx_lock);
@@ -353,346 +338,60 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
spin_unlock_bh(&mux->rx_lock);
}
-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
- if (psock->sk->sk_rcvtimeo)
- mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
- (*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
- unsigned int orig_offset, size_t orig_len)
-{
- struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
- struct kcm_rx_msg *rxm;
- struct kcm_sock *kcm;
- struct sk_buff *head, *skb;
- size_t eaten = 0, cand_len;
- ssize_t extra;
- int err;
- bool cloned_orig = false;
-
- if (psock->ready_rx_msg)
- return 0;
-
- head = psock->rx_skb_head;
- if (head) {
- /* Message already in progress */
-
- rxm = kcm_rx_msg(head);
- if (unlikely(rxm->early_eaten)) {
- /* Already some number of bytes on the receive sock
- * data saved in rx_skb_head, just indicate they
- * are consumed.
- */
- eaten = orig_len <= rxm->early_eaten ?
- orig_len : rxm->early_eaten;
- rxm->early_eaten -= eaten;
-
- return eaten;
- }
-
- if (unlikely(orig_offset)) {
- /* Getting data with a non-zero offset when a message is
- * in progress is not expected. If it does happen, we
- * need to clone and pull since we can't deal with
- * offsets in the skbs for a message expect in the head.
- */
- orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!orig_skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- return 0;
- }
- if (!pskb_pull(orig_skb, orig_offset)) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- kfree_skb(orig_skb);
- desc->error = -ENOMEM;
- return 0;
- }
- cloned_orig = true;
- orig_offset = 0;
- }
-
- if (!psock->rx_skb_nextp) {
- /* We are going to append to the frags_list of head.
- * Need to unshare the frag_list.
- */
- err = skb_unclone(head, GFP_ATOMIC);
- if (err) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = err;
- return 0;
- }
-
- if (unlikely(skb_shinfo(head)->frag_list)) {
- /* We can't append to an sk_buff that already
- * has a frag_list. We create a new head, point
- * the frag_list of that to the old head, and
- * then are able to use the old head->next for
- * appending to the message.
- */
- if (WARN_ON(head->next)) {
- desc->error = -EINVAL;
- return 0;
- }
-
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- return 0;
- }
- skb->len = head->len;
- skb->data_len = head->len;
- skb->truesize = head->truesize;
- *kcm_rx_msg(skb) = *kcm_rx_msg(head);
- psock->rx_skb_nextp = &head->next;
- skb_shinfo(skb)->frag_list = head;
- psock->rx_skb_head = skb;
- head = skb;
- } else {
- psock->rx_skb_nextp =
- &skb_shinfo(head)->frag_list;
- }
- }
- }
-
- while (eaten < orig_len) {
- /* Always clone since we will consume something */
- skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- break;
- }
-
- cand_len = orig_len - eaten;
-
- head = psock->rx_skb_head;
- if (!head) {
- head = skb;
- psock->rx_skb_head = head;
- /* Will set rx_skb_nextp on next packet if needed */
- psock->rx_skb_nextp = NULL;
- rxm = kcm_rx_msg(head);
- memset(rxm, 0, sizeof(*rxm));
- rxm->offset = orig_offset + eaten;
- } else {
- /* Unclone since we may be appending to an skb that we
- * already share a frag_list with.
- */
- err = skb_unclone(skb, GFP_ATOMIC);
- if (err) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = err;
- break;
- }
-
- rxm = kcm_rx_msg(head);
- *psock->rx_skb_nextp = skb;
- psock->rx_skb_nextp = &skb->next;
- head->data_len += skb->len;
- head->len += skb->len;
- head->truesize += skb->truesize;
- }
-
- if (!rxm->full_len) {
- ssize_t len;
-
- len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
- if (!len) {
- /* Need more header to determine length */
- if (!rxm->accum_len) {
- /* Start RX timer for new message */
- kcm_start_rx_timer(psock);
- }
- rxm->accum_len += cand_len;
- eaten += cand_len;
- KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
- WARN_ON(eaten != orig_len);
- break;
- } else if (len > psock->sk->sk_rcvbuf) {
- /* Message length exceeds maximum allowed */
- KCM_STATS_INCR(psock->stats.rx_msg_too_big);
- desc->error = -EMSGSIZE;
- psock->rx_skb_head = NULL;
- kcm_abort_rx_psock(psock, EMSGSIZE, head);
- break;
- } else if (len <= (ssize_t)head->len -
- skb->len - rxm->offset) {
- /* Length must be into new skb (and also
- * greater than zero)
- */
- KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
- desc->error = -EPROTO;
- psock->rx_skb_head = NULL;
- kcm_abort_rx_psock(psock, EPROTO, head);
- break;
- }
-
- rxm->full_len = len;
- }
-
- extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
- if (extra < 0) {
- /* Message not complete yet. */
- if (rxm->full_len - rxm->accum_len >
- tcp_inq(psock->sk)) {
- /* Don't have the whole messages in the socket
- * buffer. Set psock->rx_need_bytes to wait for
- * the rest of the message. Also, set "early
- * eaten" since we've already buffered the skb
- * but don't consume yet per tcp_read_sock.
- */
-
- if (!rxm->accum_len) {
- /* Start RX timer for new message */
- kcm_start_rx_timer(psock);
- }
-
- psock->rx_need_bytes = rxm->full_len -
- rxm->accum_len;
- rxm->accum_len += cand_len;
- rxm->early_eaten = cand_len;
- KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
- desc->count = 0; /* Stop reading socket */
- break;
- }
- rxm->accum_len += cand_len;
- eaten += cand_len;
- WARN_ON(eaten != orig_len);
- break;
- }
-
- /* Positive extra indicates ore bytes than needed for the
- * message
- */
-
- WARN_ON(extra > cand_len);
-
- eaten += (cand_len - extra);
-
- /* Hurray, we have a new message! */
- del_timer(&psock->rx_msg_timer);
- psock->rx_skb_head = NULL;
- KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
- kcm = reserve_rx_kcm(psock, head);
- if (!kcm) {
- /* Unable to reserve a KCM, message is held in psock. */
- break;
- }
-
- if (kcm_queue_rcv_skb(&kcm->sk, head)) {
- /* Should mean socket buffer full */
- unreserve_rx_kcm(psock, false);
- goto try_queue;
- }
- }
-
- if (cloned_orig)
- kfree_skb(orig_skb);
-
- KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
- return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
- read_descriptor_t desc;
-
- desc.arg.data = psock;
- desc.error = 0;
- desc.count = 1; /* give more than one skb per call */
-
- /* sk should be locked here, so okay to do tcp_read_sock */
- tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
- unreserve_rx_kcm(psock, true);
-
- return desc.error;
-}
-
/* Lower sock lock held */
-static void psock_tcp_data_ready(struct sock *sk)
+static void psock_data_ready(struct sock *sk)
{
struct kcm_psock *psock;
read_lock_bh(&sk->sk_callback_lock);
psock = (struct kcm_psock *)sk->sk_user_data;
- if (unlikely(!psock || psock->rx_stopped))
- goto out;
+ if (likely(psock))
+ strp_data_ready(&psock->strp);
- if (psock->ready_rx_msg)
- goto out;
-
- if (psock->rx_need_bytes) {
- if (tcp_inq(sk) >= psock->rx_need_bytes)
- psock->rx_need_bytes = 0;
- else
- goto out;
- }
-
- if (psock_tcp_read_sock(psock) == -ENOMEM)
- queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
read_unlock_bh(&sk->sk_callback_lock);
}
-static void do_psock_rx_work(struct kcm_psock *psock)
+/* Called with lower sock held */
+static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
{
- read_descriptor_t rd_desc;
- struct sock *csk = psock->sk;
-
- /* We need the read lock to synchronize with psock_tcp_data_ready. We
- * need the socket lock for calling tcp_read_sock.
- */
- lock_sock(csk);
- read_lock_bh(&csk->sk_callback_lock);
-
- if (unlikely(csk->sk_user_data != psock))
- goto out;
-
- if (unlikely(psock->rx_stopped))
- goto out;
-
- if (psock->ready_rx_msg)
- goto out;
-
- rd_desc.arg.data = psock;
+ struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+ struct kcm_sock *kcm;
- if (psock_tcp_read_sock(psock) == -ENOMEM)
- queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+try_queue:
+ kcm = reserve_rx_kcm(psock, skb);
+ if (!kcm) {
+ /* Unable to reserve a KCM, message is held in psock and strp
+ * is paused.
+ */
+ return;
+ }
-out:
- read_unlock_bh(&csk->sk_callback_lock);
- release_sock(csk);
+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+ /* Should mean socket buffer full */
+ unreserve_rx_kcm(psock, false);
+ goto try_queue;
+ }
}
-static void psock_rx_work(struct work_struct *w)
+static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
{
- do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+ struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+ struct bpf_prog *prog = psock->bpf_prog;
+
+ return (*prog->bpf_func)(skb, prog->insnsi);
}
-static void psock_rx_delayed_work(struct work_struct *w)
+static int kcm_read_sock_done(struct strparser *strp, int err)
{
- do_psock_rx_work(container_of(w, struct kcm_psock,
- rx_delayed_work.work));
+ struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+
+ unreserve_rx_kcm(psock, true);
+
+ return err;
}
-static void psock_tcp_state_change(struct sock *sk)
+static void psock_state_change(struct sock *sk)
{
/* TCP only does a POLLIN for a half close. Do a POLLHUP here
* since application will normally not poll with POLLIN
@@ -702,7 +401,7 @@ static void psock_tcp_state_change(struct sock *sk)
report_csk_error(sk, EPIPE);
}
-static void psock_tcp_write_space(struct sock *sk)
+static void psock_write_space(struct sock *sk)
{
struct kcm_psock *psock;
struct kcm_mux *mux;
@@ -713,14 +412,13 @@ static void psock_tcp_write_space(struct sock *sk)
psock = (struct kcm_psock *)sk->sk_user_data;
if (unlikely(!psock))
goto out;
-
mux = psock->mux;
spin_lock_bh(&mux->lock);
/* Check if the socket is reserved so someone is waiting for sending. */
kcm = psock->tx_kcm;
- if (kcm)
+ if (kcm && !unlikely(kcm->tx_stopped))
queue_work(kcm_wq, &kcm->tx_work);
spin_unlock_bh(&mux->lock);
@@ -1411,7 +1109,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
struct kcm_sock *kcm = kcm_sk(sk);
int err = 0;
long timeo;
- struct kcm_rx_msg *rxm;
+ struct strp_rx_msg *rxm;
int copied = 0;
struct sk_buff *skb;
@@ -1425,7 +1123,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
/* Okay, have a message on the receive queue */
- rxm = kcm_rx_msg(skb);
+ rxm = strp_rx_msg(skb);
if (len > rxm->full_len)
len = rxm->full_len;
@@ -1481,7 +1179,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
long timeo;
- struct kcm_rx_msg *rxm;
+ struct strp_rx_msg *rxm;
int err = 0;
ssize_t copied;
struct sk_buff *skb;
@@ -1498,7 +1196,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
/* Okay, have a message on the receive queue */
- rxm = kcm_rx_msg(skb);
+ rxm = strp_rx_msg(skb);
if (len > rxm->full_len)
len = rxm->full_len;
@@ -1674,15 +1372,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
spin_unlock_bh(&mux->rx_lock);
}
-static void kcm_rx_msg_timeout(unsigned long arg)
-{
- struct kcm_psock *psock = (struct kcm_psock *)arg;
-
- /* Message assembly timed out */
- KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
- kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
-}
-
static int kcm_attach(struct socket *sock, struct socket *csock,
struct bpf_prog *prog)
{
@@ -1692,19 +1381,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
struct kcm_psock *psock = NULL, *tpsock;
struct list_head *head;
int index = 0;
-
- if (csock->ops->family != PF_INET &&
- csock->ops->family != PF_INET6)
- return -EINVAL;
+ struct strp_callbacks cb;
+ int err;
csk = csock->sk;
if (!csk)
return -EINVAL;
- /* Only support TCP for now */
- if (csk->sk_protocol != IPPROTO_TCP)
- return -EINVAL;
-
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
if (!psock)
return -ENOMEM;
@@ -1713,11 +1396,16 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
psock->sk = csk;
psock->bpf_prog = prog;
- setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
- (unsigned long)psock);
+ cb.rcv_msg = kcm_rcv_strparser;
+ cb.abort_parser = NULL;
+ cb.parse_msg = kcm_parse_func_strparser;
+ cb.read_sock_done = kcm_read_sock_done;
- INIT_WORK(&psock->rx_work, psock_rx_work);
- INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+ err = strp_init(&psock->strp, csk, &cb);
+ if (err) {
+ kmem_cache_free(kcm_psockp, psock);
+ return err;
+ }
sock_hold(csk);
@@ -1726,9 +1414,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
csk->sk_user_data = psock;
- csk->sk_data_ready = psock_tcp_data_ready;
- csk->sk_write_space = psock_tcp_write_space;
- csk->sk_state_change = psock_tcp_state_change;
+ csk->sk_data_ready = psock_data_ready;
+ csk->sk_write_space = psock_write_space;
+ csk->sk_state_change = psock_state_change;
write_unlock_bh(&csk->sk_callback_lock);
/* Finished initialization, now add the psock to the MUX. */
@@ -1750,7 +1438,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
spin_unlock_bh(&mux->lock);
/* Schedule RX work in case there are already bytes queued */
- queue_work(kcm_wq, &psock->rx_work);
+ strp_check_rcv(&psock->strp);
return 0;
}
@@ -1790,6 +1478,8 @@ static void kcm_unattach(struct kcm_psock *psock)
struct sock *csk = psock->sk;
struct kcm_mux *mux = psock->mux;
+ lock_sock(csk);
+
/* Stop getting callbacks from TCP socket. After this there should
* be no way to reserve a kcm for this psock.
*/
@@ -1798,7 +1488,7 @@ static void kcm_unattach(struct kcm_psock *psock)
csk->sk_data_ready = psock->save_data_ready;
csk->sk_write_space = psock->save_write_space;
csk->sk_state_change = psock->save_state_change;
- psock->rx_stopped = 1;
+ strp_stop(&psock->strp);
if (WARN_ON(psock->rx_kcm)) {
write_unlock_bh(&csk->sk_callback_lock);
@@ -1821,18 +1511,17 @@ static void kcm_unattach(struct kcm_psock *psock)
write_unlock_bh(&csk->sk_callback_lock);
- del_timer_sync(&psock->rx_msg_timer);
- cancel_work_sync(&psock->rx_work);
- cancel_delayed_work_sync(&psock->rx_delayed_work);
+ /* Call strp_done without sock lock */
+ release_sock(csk);
+ strp_done(&psock->strp);
+ lock_sock(csk);
bpf_prog_put(psock->bpf_prog);
- kfree_skb(psock->rx_skb_head);
- psock->rx_skb_head = NULL;
-
spin_lock_bh(&mux->lock);
aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+ save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
KCM_STATS_INCR(mux->stats.psock_unattach);
@@ -1875,6 +1564,8 @@ no_reserved:
fput(csk->sk_socket->file);
kmem_cache_free(kcm_psockp, psock);
}
+
+ release_sock(csk);
}
static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
@@ -1915,6 +1606,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
spin_unlock_bh(&mux->lock);
+ /* Lower socket lock should already be held */
kcm_unattach(psock);
err = 0;
@@ -2072,6 +1764,8 @@ static void release_mux(struct kcm_mux *mux)
aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
aggregate_psock_stats(&mux->aggregate_psock_stats,
&knet->aggregate_psock_stats);
+ aggregate_strp_stats(&mux->aggregate_strp_stats,
+ &knet->aggregate_strp_stats);
list_del_rcu(&mux->kcm_mux_list);
knet->count--;
mutex_unlock(&knet->mutex);
@@ -2151,6 +1845,13 @@ static int kcm_release(struct socket *sock)
* it will just return.
*/
__skb_queue_purge(&sk->sk_write_queue);
+
+ /* Set tx_stopped. This is checked when psock is bound to a kcm and we
+ * get a writespace callback. This prevents further work being queued
+ * from the callback (unbinding the psock occurs after canceling work.
+ */
+ kcm->tx_stopped = 1;
+
release_sock(sk);
spin_lock_bh(&mux->lock);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 232cb92033e8..34eff77982cf 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
return 1;
- if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+ if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
skb_pull(skb, 2);
return 0;
@@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
{
- static const unsigned char ppph[2] = { 0xff, 0x03 };
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error;
@@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
uhlen + session->hdr_len +
- sizeof(ppph) + total_len,
+ 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
0, GFP_KERNEL);
if (!skb)
goto error_put_sess_tun;
@@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
skb_reserve(skb, uhlen);
/* Add PPP header */
- skb->data[0] = ppph[0];
- skb->data[1] = ppph[1];
+ skb->data[0] = PPP_ALLSTATIONS;
+ skb->data[1] = PPP_UI;
skb_put(skb, 2);
/* Copy user data into skb */
@@ -369,7 +368,6 @@ error:
*/
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
struct l2tp_session *session;
@@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
sizeof(struct iphdr) + /* IP header */
uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */
session->hdr_len + /* L2TP header */
- sizeof(ppph); /* PPP header */
+ 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
if (skb_cow_head(skb, headroom))
goto abort_put_sess_tun;
/* Setup PPP header */
- __skb_push(skb, sizeof(ppph));
- skb->data[0] = ppph[0];
- skb->data[1] = ppph[1];
+ __skb_push(skb, 2);
+ skb->data[0] = PPP_ALLSTATIONS;
+ skb->data[1] = PPP_UI;
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session *session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
if (sock) {
- inet_shutdown(sock, 2);
+ inet_shutdown(sock, SEND_SHUTDOWN);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ba5fc1f01e53..42a41ae405ba 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1088,13 +1088,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
}
static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
- struct ieee80211_sta *sta)
+ struct sta_info *sta)
{
u32 ret = 0;
- trace_drv_get_expected_throughput(sta);
- if (local->ops->get_expected_throughput)
- ret = local->ops->get_expected_throughput(&local->hw, sta);
+ trace_drv_get_expected_throughput(&sta->sta);
+ if (local->ops->get_expected_throughput && sta->uploaded)
+ ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
trace_drv_return_u32(local, ret);
return ret;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 8f9c3bde835f..fa7d37cf0351 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
u32 tx_time, estimated_retx;
u64 result;
- if (sta->mesh->fail_avg >= 100)
- return MAX_METRIC;
+ /* Try to get rate based on HW/SW RC algorithm.
+ * Rate is returned in units of Kbps, correct this
+ * to comply with airtime calculation units
+ * Round up in case we get rate < 100Kbps
+ */
+ rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
- rate = cfg80211_calculate_bitrate(&rinfo);
- if (WARN_ON(!rate))
- return MAX_METRIC;
+ if (rate) {
+ err = 0;
+ } else {
+ if (sta->mesh->fail_avg >= 100)
+ return MAX_METRIC;
- err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+ rate = cfg80211_calculate_bitrate(&rinfo);
+ if (WARN_ON(!rate))
+ return MAX_METRIC;
+
+ err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ }
/* bitrate is in units of 100 Kbps, while we need rate in units of
* 1Mbps. This will be corrected on tx_time computation.
*/
tx_time = (device_constant + 10 * test_frame_len / rate);
estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
- result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+ result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
return (u32)result;
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 76b737dcc36f..19f14c907d74 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2279,11 +2279,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
- /* check if the driver has a SW RC implementation */
- if (ref && ref->ops->get_expected_throughput)
- thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
- else
- thr = drv_get_expected_throughput(local, &sta->sta);
+ thr = sta_get_expected_throughput(sta);
if (thr != 0) {
sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@ -2291,6 +2287,25 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
}
}
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct rate_control_ref *ref = NULL;
+ u32 thr = 0;
+
+ if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ ref = local->rate_ctrl;
+
+ /* check if the driver has a SW RC implementation */
+ if (ref && ref->ops->get_expected_throughput)
+ thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ else
+ thr = drv_get_expected_throughput(local, sta);
+
+ return thr;
+}
+
unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b0ef32dddd..0556be3e3628 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -712,6 +712,8 @@ void sta_set_rate_info_tx(struct sta_info *sta,
struct rate_info *rinfo);
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
+u32 sta_get_expected_throughput(struct sta_info *sta);
+
void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
unsigned long exp_time);
u8 sta_info_tx_streams(struct sta_info *sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 502396694f47..1d0746dfea57 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2334,7 +2334,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
const u8 *encaps_data;
int encaps_len, skip_header_bytes;
- int nh_pos, h_pos;
bool wme_sta = false, authorized = false;
bool tdls_peer;
bool multicast;
@@ -2640,13 +2639,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
encaps_len = 0;
}
- nh_pos = skb_network_header(skb) - skb->data;
- h_pos = skb_transport_header(skb) - skb->data;
-
skb_pull(skb, skip_header_bytes);
- nh_pos -= skip_header_bytes;
- h_pos -= skip_header_bytes;
-
head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
/*
@@ -2672,18 +2665,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
}
}
- if (encaps_data) {
+ if (encaps_data)
memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- nh_pos += encaps_len;
- h_pos += encaps_len;
- }
#ifdef CONFIG_MAC80211_MESH
- if (meshhdrlen > 0) {
+ if (meshhdrlen > 0)
memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
- nh_pos += meshhdrlen;
- h_pos += meshhdrlen;
- }
#endif
if (ieee80211_is_data_qos(fc)) {
@@ -2699,15 +2686,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
} else
memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
- nh_pos += hdrlen;
- h_pos += hdrlen;
-
- /* Update skb pointers to various headers since this modified frame
- * is going to go through Linux networking code that may potentially
- * need things like pointer to IP header. */
skb_reset_mac_header(skb);
- skb_set_network_header(skb, nh_pos);
- skb_set_transport_header(skb, h_pos);
info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
@@ -4390,9 +4369,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
int ac = ieee802_1d_to_ac[tid & 7];
skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
-
skb_set_queue_mapping(skb, ac);
skb->priority = tid;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8dd836a8dd60..3e3e2534478a 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -63,43 +63,75 @@ out_nlmsg_trim:
static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
int protocol, int s_num)
{
+ struct rhashtable_iter *hti = (void *)cb->args[2];
struct netlink_table *tbl = &nl_table[protocol];
- struct rhashtable *ht = &tbl->hash;
- const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
struct net *net = sock_net(skb->sk);
struct netlink_diag_req *req;
struct netlink_sock *nlsk;
struct sock *sk;
- int ret = 0, num = 0, i;
+ int num = 2;
+ int ret = 0;
req = nlmsg_data(cb->nlh);
- for (i = 0; i < htbl->size; i++) {
- struct rhash_head *pos;
+ if (s_num > 1)
+ goto mc_list;
- rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
- sk = (struct sock *)nlsk;
+ num--;
- if (!net_eq(sock_net(sk), net))
- continue;
- if (num < s_num) {
- num++;
+ if (!hti) {
+ hti = kmalloc(sizeof(*hti), GFP_KERNEL);
+ if (!hti)
+ return -ENOMEM;
+
+ cb->args[2] = (long)hti;
+ }
+
+ if (!s_num)
+ rhashtable_walk_enter(&tbl->hash, hti);
+
+ ret = rhashtable_walk_start(hti);
+ if (ret == -EAGAIN)
+ ret = 0;
+ if (ret)
+ goto stop;
+
+ while ((nlsk = rhashtable_walk_next(hti))) {
+ if (IS_ERR(nlsk)) {
+ ret = PTR_ERR(nlsk);
+ if (ret == -EAGAIN) {
+ ret = 0;
continue;
}
+ break;
+ }
- if (sk_diag_fill(sk, skb, req,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- sock_i_ino(sk)) < 0) {
- ret = 1;
- goto done;
- }
+ sk = (struct sock *)nlsk;
- num++;
+ if (!net_eq(sock_net(sk), net))
+ continue;
+
+ if (sk_diag_fill(sk, skb, req,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ sock_i_ino(sk)) < 0) {
+ ret = 1;
+ break;
}
}
+stop:
+ rhashtable_walk_stop(hti);
+ if (ret)
+ goto done;
+
+ rhashtable_walk_exit(hti);
+ cb->args[2] = 0;
+ num++;
+
+mc_list:
+ read_lock(&nl_table_lock);
sk_for_each_bound(sk, &tbl->mc_list) {
if (sk_hashed(sk))
continue;
@@ -116,13 +148,14 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
NLM_F_MULTI,
sock_i_ino(sk)) < 0) {
ret = 1;
- goto done;
+ break;
}
num++;
}
+ read_unlock(&nl_table_lock);
+
done:
cb->args[0] = num;
- cb->args[1] = protocol;
return ret;
}
@@ -131,20 +164,20 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_diag_req *req;
int s_num = cb->args[0];
+ int err = 0;
req = nlmsg_data(cb->nlh);
- rcu_read_lock();
- read_lock(&nl_table_lock);
-
if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
int i;
for (i = cb->args[1]; i < MAX_LINKS; i++) {
- if (__netlink_diag_dump(skb, cb, i, s_num))
+ err = __netlink_diag_dump(skb, cb, i, s_num);
+ if (err)
break;
s_num = 0;
}
+ cb->args[1] = i;
} else {
if (req->sdiag_protocol >= MAX_LINKS) {
read_unlock(&nl_table_lock);
@@ -152,13 +185,22 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
return -ENOENT;
}
- __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+ err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
}
- read_unlock(&nl_table_lock);
- rcu_read_unlock();
+ return err < 0 ? err : skb->len;
+}
+
+static int netlink_diag_dump_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *hti = (void *)cb->args[2];
+
+ if (cb->args[0] == 1)
+ rhashtable_walk_exit(hti);
- return skb->len;
+ kfree(hti);
+
+ return 0;
}
static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
@@ -172,6 +214,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
if (h->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = netlink_diag_dump,
+ .done = netlink_diag_dump_done,
};
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
} else
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 046f7508c06b..45ac8e8e58f4 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
void rds_ib_state_change(struct sock *sk);
int rds_ib_listen_init(void);
void rds_ib_listen_stop(void);
+__printf(2, 3)
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b2d17f0fafa8..fd0bccb2f9f9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...);
#define rds_conn_error(conn, fmt...) \
__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
+__printf(2, 3)
void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
#define rds_conn_path_error(cp, fmt...) \
__rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 88effadd4b16..c7cf356b42b8 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -22,6 +22,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
#include "ar-internal.h"
MODULE_DESCRIPTION("RxRPC network protocol");
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ff83fb1ddd47..c761124961cc 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -255,6 +255,9 @@ enum rxrpc_conn_flag {
RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */
+ RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */
+ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
+ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
};
/*
@@ -265,6 +268,17 @@ enum rxrpc_conn_event {
};
/*
+ * The connection cache state.
+ */
+enum rxrpc_conn_cache_state {
+ RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */
+ RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */
+ RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */
+ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */
+ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */
+};
+
+/*
* The connection protocol state.
*/
enum rxrpc_conn_proto_state {
@@ -276,6 +290,7 @@ enum rxrpc_conn_proto_state {
RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */
+ RXRPC_CONN_LOCAL_ERROR, /* Conn terminated by local error */
RXRPC_CONN__NR_STATES
};
@@ -288,23 +303,33 @@ struct rxrpc_connection {
struct rxrpc_conn_proto proto;
struct rxrpc_conn_parameters params;
- spinlock_t channel_lock;
+ atomic_t usage;
+ struct rcu_head rcu;
+ struct list_head cache_link;
+ spinlock_t channel_lock;
+ unsigned char active_chans; /* Mask of active channels */
+#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
+ struct list_head waiting_calls; /* Calls waiting for channels */
struct rxrpc_channel {
struct rxrpc_call __rcu *call; /* Active call */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
u32 last_call; /* ID of last call */
- u32 last_result; /* Result of last call (0/abort) */
+ u8 last_type; /* Type of last packet */
+ u16 last_service_id;
+ union {
+ u32 last_seq;
+ u32 last_abort;
+ };
} channels[RXRPC_MAXCALLS];
- wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
- struct rcu_head rcu;
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
struct rb_node service_node; /* Node in peer->service_conns */
};
+ struct list_head proc_link; /* link in procfs list */
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
@@ -313,17 +338,16 @@ struct rxrpc_connection {
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
unsigned long events;
- unsigned long put_time; /* Time at which last put */
+ unsigned long idle_timestamp; /* Time at which last became idle */
spinlock_t state_lock; /* state-change lock */
- atomic_t usage;
+ enum rxrpc_conn_cache_state cache_state : 8;
enum rxrpc_conn_proto_state state : 8; /* current state of connection */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
int error; /* local error incurred */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
- atomic_t hi_serial; /* highest serial number received */
- atomic_t avail_chans; /* number of channels available */
+ unsigned int hi_serial; /* highest serial number received */
u8 size_align; /* data size alignment (for security) */
u8 header_size; /* rxrpc + security header size */
u8 security_size; /* security header size */
@@ -341,10 +365,11 @@ enum rxrpc_call_flag {
RXRPC_CALL_RCVD_LAST, /* all packets received */
RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */
RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */
- RXRPC_CALL_PROC_BUSY, /* the processor is busy */
RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */
RXRPC_CALL_HAS_USERID, /* has a user ID attached */
RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */
+ RXRPC_CALL_IS_SERVICE, /* Call is service call */
+ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
};
/*
@@ -402,6 +427,7 @@ enum rxrpc_call_state {
struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock *socket; /* socket responsible */
struct timer_list lifetimer; /* lifetime remaining on call */
struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */
@@ -410,13 +436,14 @@ struct rxrpc_call {
struct work_struct destroyer; /* call destroyer */
struct work_struct processor; /* packet processor and ACK generator */
struct list_head link; /* link in master call list */
+ struct list_head chan_wait_link; /* Link in conn->waiting_calls */
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* calls awaiting acceptance */
struct rb_node sock_node; /* node in socket call tree */
struct sk_buff_head rx_queue; /* received packets */
struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
- wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ wait_queue_head_t waitq; /* Wait queue for channel or Tx */
__be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long creation_jif; /* time of call creation */
@@ -432,8 +459,10 @@ struct rxrpc_call {
int error_report; /* Network error (ICMP/local transport) */
int error; /* Local error incurred */
enum rxrpc_call_state state : 8; /* current state of call */
+ u16 service_id; /* service ID */
+ u32 call_id; /* call ID on connection */
+ u32 cid; /* connection ID plus channel index */
int debug_id; /* debug ID for printks */
- u8 channel; /* connection channel occupied by this call */
/* transmission-phase ACK management */
u8 acks_head; /* offset into window of first entry */
@@ -455,19 +484,13 @@ struct rxrpc_call {
rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
atomic_t ackr_not_idle; /* number of packets in Rx queue */
/* received packet records, 1 bit per record */
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
-
- u8 in_clientflag; /* Copy of conn->in_clientflag */
- struct rxrpc_local *local; /* Local endpoint. */
- u32 call_id; /* call ID on connection */
- u32 cid; /* connection ID plus channel index */
- u32 epoch; /* epoch of this connection */
- u16 service_id; /* service ID */
};
/*
@@ -484,6 +507,8 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
write_unlock_bh(&call->state_lock);
}
+#include <trace/events/rxrpc.h>
+
/*
* af_rxrpc.c
*/
@@ -502,8 +527,8 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool);
void rxrpc_process_call(struct work_struct *);
/*
@@ -528,15 +553,32 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
void __rxrpc_put_call(struct rxrpc_call *);
void __exit rxrpc_destroy_all_calls(void);
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+ return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+ return !rxrpc_is_service_call(call);
+}
+
/*
* conn_client.c
*/
+extern unsigned int rxrpc_max_client_connections;
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned int rxrpc_conn_idle_client_expiry;
+extern unsigned int rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_call *);
+void rxrpc_put_client_conn(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_client_connections(void);
/*
* conn_event.c
@@ -550,15 +592,17 @@ void rxrpc_reject_packets(struct rxrpc_local *);
*/
extern unsigned int rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
+extern struct list_head rxrpc_connection_proc_list;
extern rwlock_t rxrpc_connection_lock;
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
struct sk_buff *);
-void __rxrpc_disconnect_call(struct rxrpc_call *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_put_connection(struct rxrpc_connection *);
+void rxrpc_kill_connection(struct rxrpc_connection *);
+void __rxrpc_put_connection(struct rxrpc_connection *);
void __exit rxrpc_destroy_all_connections(void);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
@@ -582,6 +626,21 @@ struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *con
return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
}
+static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+ if (!conn)
+ return;
+
+ if (rxrpc_conn_is_client(conn)) {
+ if (atomic_dec_and_test(&conn->usage))
+ rxrpc_put_client_conn(conn);
+ } else {
+ if (atomic_dec_return(&conn->usage) == 1)
+ __rxrpc_put_connection(conn);
+ }
+}
+
+
static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
{
if (!rxrpc_get_connection_maybe(conn))
@@ -697,9 +756,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
struct sockaddr_rxrpc *, gfp_t);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
{
atomic_inc(&peer->usage);
+ return peer;
}
static inline
@@ -747,6 +807,11 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *);
* skbuff.c
*/
void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *);
+void rxrpc_see_skb(struct sk_buff *);
+void rxrpc_get_skb(struct sk_buff *);
+void rxrpc_free_skb(struct sk_buff *);
+void rxrpc_purge_queue(struct sk_buff_head *);
/*
* sysctl.c
@@ -894,44 +959,6 @@ do { \
#endif /* __KDEBUGALL */
-/*
- * socket buffer accounting / leak finding
- */
-static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
-{
- //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_inc(&rxrpc_n_skbs);
-}
-
-#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
-
-static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
-{
- //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_dec(&rxrpc_n_skbs);
-}
-
-#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
-
-static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
-{
- if (skb) {
- CHECK_SLAB_OKAY(&skb->users);
- //_net("free skb %p %s [%d]",
- // skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_dec(&rxrpc_n_skbs);
- kfree_skb(skb);
- }
-}
-
-#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
-
-static inline void rxrpc_purge_queue(struct sk_buff_head *list)
-{
- struct sk_buff *skb;
- while ((skb = skb_dequeue((list))) != NULL)
- rxrpc_free_skb(skb);
-}
#define rxrpc_get_call(CALL) \
do { \
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 9bae21e66d65..669ac79d3b44 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -203,6 +203,7 @@ void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
_net("incoming call skb %p", skb);
+ rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
/* Set up a response packet header in case we need it */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e60cf65c2232..5292bcfd8816 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -25,7 +25,7 @@
* propose an ACK be sent
*/
void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate)
+ u16 skew, u32 serial, bool immediate)
{
unsigned long expiry;
s8 prior = rxrpc_ack_priority[ack_reason];
@@ -44,8 +44,10 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
* numbers */
if (prior == rxrpc_ack_priority[call->ackr_reason]) {
- if (prior <= 4)
+ if (prior <= 4) {
+ call->ackr_skew = skew;
call->ackr_serial = serial;
+ }
if (immediate)
goto cancel_timer;
return;
@@ -103,13 +105,13 @@ cancel_timer:
* propose an ACK be sent, locking the call structure
*/
void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate)
+ u16 skew, u32 serial, bool immediate)
{
s8 prior = rxrpc_ack_priority[ack_reason];
if (prior > rxrpc_ack_priority[call->ackr_reason]) {
spin_lock_bh(&call->lock);
- __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+ __rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate);
spin_unlock_bh(&call->lock);
}
}
@@ -191,6 +193,8 @@ static void rxrpc_resend(struct rxrpc_call *call)
stop = true;
sp->resend_at = jiffies + 3;
} else {
+ if (rxrpc_is_client_call(call))
+ rxrpc_expose_client_call(call);
sp->resend_at =
jiffies + rxrpc_resend_timeout;
}
@@ -376,7 +380,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
call->acks_hard++;
}
- wake_up(&call->tx_waitq);
+ wake_up(&call->waitq);
}
/*
@@ -407,6 +411,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
skb = skb_dequeue(&call->rx_oos_queue);
if (skb) {
+ rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
_debug("drain OOS packet %d [%d]",
@@ -427,6 +432,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
/* find out what the next packet is */
skb = skb_peek(&call->rx_oos_queue);
+ rxrpc_see_skb(skb);
if (skb)
call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
else
@@ -576,6 +582,7 @@ process_further:
if (!skb)
return -EAGAIN;
+ rxrpc_see_skb(skb);
_net("deferred skb %p", skb);
sp = rxrpc_skb(skb);
@@ -625,7 +632,7 @@ process_further:
if (ack.reason == RXRPC_ACK_PING) {
_proto("Rx ACK %%%u PING Request", latest);
rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
- sp->hdr.serial, true);
+ skb->priority, sp->hdr.serial, true);
}
/* discard any out-of-order or duplicate ACKs */
@@ -832,11 +839,6 @@ void rxrpc_process_call(struct work_struct *work)
call->debug_id, rxrpc_call_states[call->state], call->events,
(jiffies - call->creation_jif) / (HZ / 10));
- if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
- _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
- return;
- }
-
if (!call->conn)
goto skip_msg_init;
@@ -1155,8 +1157,7 @@ skip_msg_init:
goto maybe_reschedule;
send_ACK_with_skew:
- ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
- ntohl(ack.serial));
+ ack.maxSkew = htons(call->ackr_skew);
send_ACK:
mtu = call->conn->params.peer->if_mtu;
mtu -= call->conn->params.peer->hdrsize;
@@ -1246,7 +1247,8 @@ send_message_2:
case RXRPC_CALL_SERVER_ACK_REQUEST:
_debug("start ACK timer");
rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
- call->ackr_serial, false);
+ call->ackr_skew, call->ackr_serial,
+ false);
default:
break;
}
@@ -1281,7 +1283,6 @@ maybe_reschedule:
}
error:
- clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
kfree(acks);
/* because we don't want two CPUs both processing the work item for one
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ae057e0740f3..e7cbcc4a87cf 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -127,10 +127,11 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
INIT_WORK(&call->processor, &rxrpc_process_call);
INIT_LIST_HEAD(&call->link);
+ INIT_LIST_HEAD(&call->chan_wait_link);
INIT_LIST_HEAD(&call->accept_link);
skb_queue_head_init(&call->rx_queue);
skb_queue_head_init(&call->rx_oos_queue);
- init_waitqueue_head(&call->tx_waitq);
+ init_waitqueue_head(&call->waitq);
spin_lock_init(&call->lock);
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
@@ -167,10 +168,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
sock_hold(&rx->sk);
call->socket = rx;
call->rx_data_post = 1;
-
- call->local = rx->local;
call->service_id = srx->srx_service;
- call->in_clientflag = 0;
_leave(" = %p", call);
return call;
@@ -318,11 +316,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
chan = sp->hdr.cid & RXRPC_CHANNELMASK;
candidate->socket = rx;
candidate->conn = conn;
+ candidate->peer = conn->params.peer;
candidate->cid = sp->hdr.cid;
candidate->call_id = sp->hdr.callNumber;
- candidate->channel = chan;
candidate->rx_data_post = 0;
candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+ candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE);
if (conn->security_ix > 0)
candidate->state = RXRPC_CALL_SERVER_SECURING;
@@ -332,7 +331,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
call = rcu_dereference_protected(conn->channels[chan].call,
lockdep_is_held(&conn->channel_lock));
- _debug("channel[%u] is %p", candidate->channel, call);
+ _debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call);
if (call && call->call_id == sp->hdr.callNumber) {
/* already set; must've been a duplicate packet */
_debug("extant call [%d]", call->state);
@@ -360,7 +359,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
call->debug_id, rxrpc_call_states[call->state]);
if (call->state >= RXRPC_CALL_COMPLETE) {
- __rxrpc_disconnect_call(call);
+ __rxrpc_disconnect_call(conn, call);
} else {
spin_unlock(&conn->channel_lock);
kmem_cache_free(rxrpc_call_jar, candidate);
@@ -387,6 +386,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
rcu_assign_pointer(conn->channels[chan].call, call);
sock_hold(&rx->sk);
rxrpc_get_connection(conn);
+ rxrpc_get_peer(call->peer);
spin_unlock(&conn->channel_lock);
spin_lock(&conn->params.peer->lock);
@@ -397,10 +397,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
- call->local = conn->params.local;
- call->epoch = conn->proto.epoch;
call->service_id = conn->params.service_id;
- call->in_clientflag = RXRPC_CLIENT_INITIATED;
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -569,12 +566,6 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
read_lock_bh(&rx->call_lock);
- /* mark all the calls as no longer wanting incoming packets */
- for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
- rxrpc_mark_call_released(call);
- }
-
/* kill the not-yet-accepted incoming calls */
list_for_each_entry(call, &rx->secureq, accept_link) {
rxrpc_mark_call_released(call);
@@ -584,6 +575,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
rxrpc_mark_call_released(call);
}
+ /* mark all the calls as no longer wanting incoming packets */
+ for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+ rxrpc_mark_call_released(call);
+ }
+
read_unlock_bh(&rx->call_lock);
_leave("");
}
@@ -616,6 +613,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
rxrpc_purge_queue(&call->rx_queue);
+ rxrpc_put_peer(call->peer);
kmem_cache_free(rxrpc_call_jar, call);
}
@@ -682,8 +680,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, destroyer);
- _enter("%p{%d,%d,%p}",
- call, atomic_read(&call->usage), call->channel, call->conn);
+ _enter("%p{%d,%x,%p}",
+ call, atomic_read(&call->usage), call->cid, call->conn);
ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 9e91f27b0d0f..349402b08e5a 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -7,6 +7,68 @@
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * Client connections can be in one of a number of cache states:
+ *
+ * (1) INACTIVE - The connection is not held in any list and may not have been
+ * exposed to the world. If it has been previously exposed, it was
+ * discarded from the idle list after expiring.
+ *
+ * (2) WAITING - The connection is waiting for the number of client conns to
+ * drop below the maximum capacity. Calls may be in progress upon it from
+ * when it was active and got culled.
+ *
+ * The connection is on the rxrpc_waiting_client_conns list which is kept
+ * in to-be-granted order. Culled conns with waiters go to the back of
+ * the queue just like new conns.
+ *
+ * (3) ACTIVE - The connection has at least one call in progress upon it, it
+ * may freely grant available channels to new calls and calls may be
+ * waiting on it for channels to become available.
+ *
+ * The connection is on the rxrpc_active_client_conns list which is kept
+ * in activation order for culling purposes.
+ *
+ * rxrpc_nr_active_client_conns is held incremented also.
+ *
+ * (4) CULLED - The connection got summarily culled to try and free up
+ * capacity. Calls currently in progress on the connection are allowed to
+ * continue, but new calls will have to wait. There can be no waiters in
+ * this state - the conn would have to go to the WAITING state instead.
+ *
+ * (5) IDLE - The connection has no calls in progress upon it and must have
+ * been exposed to the world (ie. the EXPOSED flag must be set). When it
+ * expires, the EXPOSED flag is cleared and the connection transitions to
+ * the INACTIVE state.
+ *
+ * The connection is on the rxrpc_idle_client_conns list which is kept in
+ * order of how soon they'll expire.
+ *
+ * There are flags of relevance to the cache:
+ *
+ * (1) EXPOSED - The connection ID got exposed to the world. If this flag is
+ * set, an extra ref is added to the connection preventing it from being
+ * reaped when it has no calls outstanding. This flag is cleared and the
+ * ref dropped when a conn is discarded from the idle list.
+ *
+ * This allows us to move terminal call state retransmission to the
+ * connection and to discard the call immediately we think it is done
+ * with. It also give us a chance to reuse the connection.
+ *
+ * (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ * should not be reused. This is set when an exclusive connection is used
+ * or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations. If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -16,22 +78,37 @@
#include <linux/timer.h>
#include "ar-internal.h"
+__read_mostly unsigned int rxrpc_max_client_connections = 1000;
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static unsigned int rxrpc_nr_client_conns;
+static unsigned int rxrpc_nr_active_client_conns;
+static __read_mostly bool rxrpc_kill_all_client_conns;
+
+static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
+static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
+static LIST_HEAD(rxrpc_waiting_client_conns);
+static LIST_HEAD(rxrpc_active_client_conns);
+static LIST_HEAD(rxrpc_idle_client_conns);
+
/*
* We use machine-unique IDs for our client connections.
*/
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+static void rxrpc_cull_active_client_conns(void);
+static void rxrpc_discard_expired_client_conns(struct work_struct *);
+
+static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
+ rxrpc_discard_expired_client_conns);
+
/*
* Get a connection ID and epoch for a client connection from the global pool.
* The connection struct pointer is then recorded in the idr radix tree. The
* epoch is changed if this wraps.
- *
- * TODO: The IDR tree gets very expensive on memory if the connection IDs are
- * widely scattered throughout the number space, so we shall need to retire
- * connections that have, say, an ID more than four times the maximum number of
- * client conns away from the current allocation point to try and keep the IDs
- * concentrated. We will also need to retire connections from an old epoch.
*/
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
@@ -114,8 +191,7 @@ void rxrpc_destroy_client_conn_ids(void)
}
/*
- * Allocate a client connection. The caller must take care to clear any
- * padding bytes in *cp.
+ * Allocate a client connection.
*/
static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
@@ -131,6 +207,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
return ERR_PTR(-ENOMEM);
}
+ atomic_set(&conn->usage, 1);
+ if (conn->params.exclusive)
+ __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
conn->params = *cp;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
conn->state = RXRPC_CONN_CLIENT;
@@ -148,7 +228,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
goto error_2;
write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
+ list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
write_unlock(&rxrpc_connection_lock);
/* We steal the caller's peer ref. */
@@ -170,32 +250,68 @@ error_0:
}
/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Determine if a connection may be reused.
*/
-int rxrpc_connect_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+ int id_cursor, id, distance, limit;
+
+ if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+ goto dont_reuse;
+
+ if (conn->proto.epoch != rxrpc_epoch)
+ goto mark_dont_reuse;
+
+ /* The IDR tree gets very expensive on memory if the connection IDs are
+ * widely scattered throughout the number space, so we shall want to
+ * kill off connections that, say, have an ID more than about four
+ * times the maximum number of client conns away from the current
+ * allocation point to try and keep the IDs concentrated.
+ */
+ id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+ id = conn->proto.cid >> RXRPC_CIDSHIFT;
+ distance = id - id_cursor;
+ if (distance < 0)
+ distance = -distance;
+ limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+ if (distance > limit)
+ goto mark_dont_reuse;
+
+ return true;
+
+mark_dont_reuse:
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+ return false;
+}
+
+/*
+ * Create or find a client connection to use for a call.
+ *
+ * If we return with a connection, the call will be on its waiting list. It's
+ * left to the caller to assign a channel and wake up the call.
+ */
+static int rxrpc_get_client_conn(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
{
struct rxrpc_connection *conn, *candidate = NULL;
struct rxrpc_local *local = cp->local;
struct rb_node *p, **pp, *parent;
long diff;
- int chan;
-
- DECLARE_WAITQUEUE(myself, current);
+ int ret = -ENOMEM;
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
if (!cp->peer)
- return -ENOMEM;
+ goto error;
+ /* If the connection is not meant to be exclusive, search the available
+ * connections to see if the connection we want to use already exists.
+ */
if (!cp->exclusive) {
- /* Search for a existing client connection unless this is going
- * to be a connection that's used exclusively for a single call.
- */
_debug("search 1");
spin_lock(&local->client_conns_lock);
p = local->client_conns.rb_node;
@@ -206,39 +322,55 @@ int rxrpc_connect_call(struct rxrpc_call *call,
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level));
- if (diff < 0)
+#undef cmp
+ if (diff < 0) {
p = p->rb_left;
- else if (diff > 0)
+ } else if (diff > 0) {
p = p->rb_right;
- else
- goto found_extant_conn;
+ } else {
+ if (rxrpc_may_reuse_conn(conn) &&
+ rxrpc_get_connection_maybe(conn))
+ goto found_extant_conn;
+ /* The connection needs replacing. It's better
+ * to effect that when we have something to
+ * replace it with so that we don't have to
+ * rebalance the tree twice.
+ */
+ break;
+ }
}
spin_unlock(&local->client_conns_lock);
}
- /* We didn't find a connection or we want an exclusive one. */
- _debug("get new conn");
+ /* There wasn't a connection yet or we need an exclusive connection.
+ * We need to create a candidate and then potentially redo the search
+ * in case we're racing with another thread also trying to connect on a
+ * shareable connection.
+ */
+ _debug("new conn");
candidate = rxrpc_alloc_client_connection(cp, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
+ if (IS_ERR(candidate)) {
+ ret = PTR_ERR(candidate);
+ goto error_peer;
}
+ /* Add the call to the new connection's waiting list in case we're
+ * going to have to wait for the connection to come live. It's our
+ * connection, so we want first dibs on the channel slots. We would
+ * normally have to take channel_lock but we do this before anyone else
+ * can see the connection.
+ */
+ list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+
if (cp->exclusive) {
- /* Assign the call on an exclusive connection to channel 0 and
- * don't add the connection to the endpoint's shareable conn
- * lookup tree.
- */
- _debug("exclusive chan 0");
- conn = candidate;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- chan = 0;
- goto found_channel;
+ call->conn = candidate;
+ _leave(" = 0 [exclusive %d]", candidate->debug_id);
+ return 0;
}
- /* We need to redo the search before attempting to add a new connection
- * lest we race with someone else adding a conflicting instance.
+ /* Publish the new connection for userspace to find. We need to redo
+ * the search before doing this lest we race with someone else adding a
+ * conflicting instance.
*/
_debug("search 2");
spin_lock(&local->client_conns_lock);
@@ -249,124 +381,672 @@ int rxrpc_connect_call(struct rxrpc_call *call,
parent = *pp;
conn = rb_entry(parent, struct rxrpc_connection, client_node);
+#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level));
- if (diff < 0)
+#undef cmp
+ if (diff < 0) {
pp = &(*pp)->rb_left;
- else if (diff > 0)
+ } else if (diff > 0) {
pp = &(*pp)->rb_right;
- else
- goto found_extant_conn;
+ } else {
+ if (rxrpc_may_reuse_conn(conn) &&
+ rxrpc_get_connection_maybe(conn))
+ goto found_extant_conn;
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+ rb_replace_node(&conn->client_node,
+ &candidate->client_node,
+ &local->client_conns);
+ goto candidate_published;
+ }
}
- /* The second search also failed; simply add the new connection with
- * the new call in channel 0. Note that we need to take the channel
- * lock before dropping the client conn lock.
- */
_debug("new conn");
- set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
rb_link_node(&candidate->client_node, parent, pp);
rb_insert_color(&candidate->client_node, &local->client_conns);
-attached:
- conn = candidate;
- candidate = NULL;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
+candidate_published:
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ call->conn = candidate;
spin_unlock(&local->client_conns_lock);
- chan = 0;
+ _leave(" = 0 [new %d]", candidate->debug_id);
+ return 0;
-found_channel:
- _debug("found chan");
- call->conn = conn;
- call->channel = chan;
- call->epoch = conn->proto.epoch;
- call->cid = conn->proto.cid | chan;
- call->call_id = ++conn->channels[chan].call_counter;
- conn->channels[chan].call_id = call->call_id;
- rcu_assign_pointer(conn->channels[chan].call, call);
+ /* We come here if we found a suitable connection already in existence.
+ * Discard any candidate we may have allocated, and try to get a
+ * channel on this one.
+ */
+found_extant_conn:
+ _debug("found conn");
+ spin_unlock(&local->client_conns_lock);
- _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+ rxrpc_put_connection(candidate);
+ candidate = NULL;
+ spin_lock(&conn->channel_lock);
+ call->conn = conn;
+ list_add(&call->chan_wait_link, &conn->waiting_calls);
spin_unlock(&conn->channel_lock);
+ _leave(" = 0 [extant %d]", conn->debug_id);
+ return 0;
+
+error_peer:
rxrpc_put_peer(cp->peer);
cp->peer = NULL;
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
- /* We found a potentially suitable connection already in existence. If
- * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
- * reaper), discard any candidate we may have allocated, and try to get
- * a channel on this one, otherwise we have to replace it.
- */
-found_extant_conn:
- _debug("found conn");
- if (!rxrpc_get_connection_maybe(conn)) {
- set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
- rb_replace_node(&conn->client_node,
- &candidate->client_node,
- &local->client_conns);
- clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
- goto attached;
+/*
+ * Activate a connection.
+ */
+static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+{
+ conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+ rxrpc_nr_active_client_conns++;
+ list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+}
+
+/*
+ * Attempt to animate a connection for a new call.
+ *
+ * If it's not exclusive, the connection is in the endpoint tree, and we're in
+ * the conn's list of those waiting to grab a channel. There is, however, a
+ * limit on the number of live connections allowed at any one time, so we may
+ * have to wait for capacity to become available.
+ *
+ * Note that a connection on the waiting queue might *also* have active
+ * channels if it has been culled to make space and then re-requested by a new
+ * call.
+ */
+static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+{
+ unsigned int nr_conns;
+
+ _enter("%d,%d", conn->debug_id, conn->cache_state);
+
+ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+ goto out;
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ nr_conns = rxrpc_nr_client_conns;
+ if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags))
+ rxrpc_nr_client_conns = nr_conns + 1;
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ case RXRPC_CONN_CLIENT_WAITING:
+ break;
+
+ case RXRPC_CONN_CLIENT_INACTIVE:
+ case RXRPC_CONN_CLIENT_CULLED:
+ case RXRPC_CONN_CLIENT_IDLE:
+ if (nr_conns >= rxrpc_max_client_connections)
+ goto wait_for_capacity;
+ goto activate_conn;
+
+ default:
+ BUG();
}
- spin_unlock(&local->client_conns_lock);
+out_unlock:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+out:
+ _leave(" [%d]", conn->cache_state);
+ return;
- rxrpc_put_connection(candidate);
+activate_conn:
+ _debug("activate");
+ rxrpc_activate_conn(conn);
+ goto out_unlock;
+
+wait_for_capacity:
+ _debug("wait");
+ conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+ list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+ goto out_unlock;
+}
+
+/*
+ * Deactivate a channel.
+ */
+static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ rcu_assign_pointer(chan->call, NULL);
+ conn->active_chans &= ~(1 << channel);
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_call *call = list_entry(conn->waiting_calls.next,
+ struct rxrpc_call, chan_wait_link);
+ u32 call_id = chan->call_counter + 1;
+
+ list_del_init(&call->chan_wait_link);
+ conn->active_chans |= 1 << channel;
+ call->peer = rxrpc_get_peer(conn->params.peer);
+ call->cid = conn->proto.cid | channel;
+ call->call_id = call_id;
+
+ _net("CONNECT call %08x:%08x as call %d on conn %d",
+ call->cid, call->call_id, call->debug_id, conn->debug_id);
+
+ /* Paired with the read barrier in rxrpc_wait_for_channel(). This
+ * orders cid and epoch in the connection wrt to call_id without the
+ * need to take the channel_lock.
+ *
+ * We provisionally assign a callNumber at this point, but we don't
+ * confirm it until the call is about to be exposed.
+ *
+ * TODO: Pair with a barrier in the data_ready handler when that looks
+ * at the call ID through a connection channel.
+ */
+ smp_wmb();
+ chan->call_id = call_id;
+ rcu_assign_pointer(chan->call, call);
+ wake_up(&call->waitq);
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_connection *conn)
+{
+ unsigned char mask;
+
+ _enter("%d", conn->debug_id);
+
+ if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE ||
+ conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+ return;
+
+ spin_lock(&conn->channel_lock);
+
+ while (!list_empty(&conn->waiting_calls) &&
+ (mask = ~conn->active_chans,
+ mask &= RXRPC_ACTIVE_CHANS_MASK,
+ mask != 0))
+ rxrpc_activate_one_channel(conn, __ffs(mask));
+
+ spin_unlock(&conn->channel_lock);
+ _leave("");
+}
+
+/*
+ * Wait for a callNumber and a channel to be granted to a call.
+ */
+static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
+{
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (!call->call_id) {
+ DECLARE_WAITQUEUE(myself, current);
- if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_put_connection(conn);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
- add_wait_queue(&conn->channel_wq, &myself);
+ add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ if (call->call_id)
break;
- if (signal_pending(current))
- goto interrupted;
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
schedule();
}
- remove_wait_queue(&conn->channel_wq, &myself);
+ remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
}
- /* The connection allegedly now has a free channel and we can now
- * attach the call to it.
- */
+ /* Paired with the write barrier in rxrpc_activate_one_channel(). */
+ smp_rmb();
+
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ int ret;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ rxrpc_discard_expired_client_conns(NULL);
+ rxrpc_cull_active_client_conns();
+
+ ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+ if (ret < 0)
+ return ret;
+
+ rxrpc_animate_client_conn(call->conn);
+ rxrpc_activate_channels(call->conn);
+
+ ret = rxrpc_wait_for_channel(call, gfp);
+ if (ret < 0)
+ rxrpc_disconnect_client_call(call);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Note that a connection is about to be exposed to the world. Once it is
+ * exposed, we maintain an extra ref on it that stops it from being summarily
+ * discarded before it's (a) had a chance to deal with retransmission and (b)
+ * had a chance at re-use (the per-connection security negotiation is
+ * expensive).
+ */
+static void rxrpc_expose_client_conn(struct rxrpc_connection *conn)
+{
+ if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+ rxrpc_get_connection(conn);
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan =
+ &conn->channels[call->cid & RXRPC_CHANNELMASK];
+
+ if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ /* Mark the call ID as being used. If the callNumber counter
+ * exceeds ~2 billion, we kill the connection after its
+ * outstanding calls have finished so that the counter doesn't
+ * wrap.
+ */
+ chan->call_counter++;
+ if (chan->call_counter >= INT_MAX)
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ rxrpc_expose_client_conn(conn);
+ }
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_call *call)
+{
+ unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ call->conn = NULL;
+
spin_lock(&conn->channel_lock);
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan].call)
- goto found_channel;
- BUG();
+ /* Calls that have never actually been assigned a channel can simply be
+ * discarded. If the conn didn't get used either, it will follow
+ * immediately unless someone else grabs it in the meantime.
+ */
+ if (!list_empty(&call->chan_wait_link)) {
+ _debug("call is waiting");
+ ASSERTCMP(call->call_id, ==, 0);
+ ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+ list_del_init(&call->chan_wait_link);
+
+ /* We must deactivate or idle the connection if it's now
+ * waiting for nothing.
+ */
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
+ list_empty(&conn->waiting_calls) &&
+ !conn->active_chans)
+ goto idle_connection;
+ goto out;
+ }
+
+ ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+ ASSERTCMP(atomic_read(&conn->usage), >=, 2);
+
+ /* If a client call was exposed to the world, we save the result for
+ * retransmission.
+ *
+ * We use a barrier here so that the call number and abort code can be
+ * read without needing to take a lock.
+ *
+ * TODO: Make the incoming packet handler check this and handle
+ * terminal retransmission without requiring access to the call.
+ */
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ _debug("exposed %u,%u", call->call_id, call->local_abort);
+ __rxrpc_disconnect_call(conn, call);
+ }
+
+ /* See if we can pass the channel directly to another call. */
+ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE &&
+ !list_empty(&conn->waiting_calls)) {
+ _debug("pass chan");
+ rxrpc_activate_one_channel(conn, channel);
+ goto out_2;
+ }
+
+ /* Things are more complex and we need the cache lock. We might be
+ * able to simply idle the conn or it might now be lurking on the wait
+ * list. It might even get moved back to the active list whilst we're
+ * waiting for the lock.
+ */
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ if (list_empty(&conn->waiting_calls)) {
+ rxrpc_deactivate_one_channel(conn, channel);
+ if (!conn->active_chans) {
+ rxrpc_nr_active_client_conns--;
+ goto idle_connection;
+ }
+ goto out;
+ }
+
+ _debug("pass chan 2");
+ rxrpc_activate_one_channel(conn, channel);
+ goto out;
+
+ case RXRPC_CONN_CLIENT_CULLED:
+ rxrpc_deactivate_one_channel(conn, channel);
+ ASSERT(list_empty(&conn->waiting_calls));
+ if (!conn->active_chans)
+ goto idle_connection;
+ goto out;
+
+ case RXRPC_CONN_CLIENT_WAITING:
+ rxrpc_deactivate_one_channel(conn, channel);
+ goto out;
+
+ default:
+ BUG();
+ }
-interrupted:
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
+out:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+out_2:
+ spin_unlock(&conn->channel_lock);
rxrpc_put_connection(conn);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
+ _leave("");
+ return;
+
+idle_connection:
+ /* As no channels remain active, the connection gets deactivated
+ * immediately or moved to the idle list for a short while.
+ */
+ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+ _debug("make idle");
+ conn->idle_timestamp = jiffies;
+ conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
+ list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
+ if (rxrpc_idle_client_conns.next == &conn->cache_link &&
+ !rxrpc_kill_all_client_conns)
+ queue_delayed_work(rxrpc_workqueue,
+ &rxrpc_client_conn_reap,
+ rxrpc_conn_idle_client_expiry);
+ } else {
+ _debug("make inactive");
+ conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+ list_del_init(&conn->cache_link);
+ }
+ goto out;
}
/*
- * Remove a client connection from the local endpoint's tree, thereby removing
- * it as a target for reuse for new client calls.
+ * Clean up a dead client connection.
*/
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+static struct rxrpc_connection *
+rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
{
+ struct rxrpc_connection *next;
struct rxrpc_local *local = conn->params.local;
+ unsigned int nr_conns;
- spin_lock(&local->client_conns_lock);
- if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
- rb_erase(&conn->client_node, &local->client_conns);
- spin_unlock(&local->client_conns_lock);
+ if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) {
+ spin_lock(&local->client_conns_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS,
+ &conn->flags))
+ rb_erase(&conn->client_node, &local->client_conns);
+ spin_unlock(&local->client_conns_lock);
+ }
rxrpc_put_client_connection_id(conn);
+
+ ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE);
+
+ if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags))
+ return NULL;
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ nr_conns = --rxrpc_nr_client_conns;
+
+ next = NULL;
+ if (nr_conns < rxrpc_max_client_connections &&
+ !list_empty(&rxrpc_waiting_client_conns)) {
+ next = list_entry(rxrpc_waiting_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ rxrpc_get_connection(next);
+ rxrpc_activate_conn(next);
+ }
+
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ rxrpc_kill_connection(conn);
+
+ if (next)
+ rxrpc_activate_channels(next);
+
+ /* We need to get rid of the temporary ref we took upon next, but we
+ * can't call rxrpc_put_connection() recursively.
+ */
+ return next;
+}
+
+/*
+ * Clean up a dead client connections.
+ */
+void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *next;
+
+ do {
+ _enter("%p{u=%d,d=%d}",
+ conn, atomic_read(&conn->usage), conn->debug_id);
+
+ next = rxrpc_put_one_client_conn(conn);
+
+ if (!next)
+ break;
+ conn = next;
+ } while (atomic_dec_and_test(&conn->usage));
+
+ _leave("");
+}
+
+/*
+ * Kill the longest-active client connections to make room for new ones.
+ */
+static void rxrpc_cull_active_client_conns(void)
+{
+ struct rxrpc_connection *conn;
+ unsigned int nr_conns = rxrpc_nr_client_conns;
+ unsigned int nr_active, limit;
+
+ _enter("");
+
+ ASSERTCMP(nr_conns, >=, 0);
+ if (nr_conns < rxrpc_max_client_connections) {
+ _leave(" [ok]");
+ return;
+ }
+ limit = rxrpc_reap_client_connections;
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ nr_active = rxrpc_nr_active_client_conns;
+
+ while (nr_active > limit) {
+ ASSERT(!list_empty(&rxrpc_active_client_conns));
+ conn = list_entry(rxrpc_active_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+
+ if (list_empty(&conn->waiting_calls)) {
+ conn->cache_state = RXRPC_CONN_CLIENT_CULLED;
+ list_del_init(&conn->cache_link);
+ } else {
+ conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+ list_move_tail(&conn->cache_link,
+ &rxrpc_waiting_client_conns);
+ }
+
+ nr_active--;
+ }
+
+ rxrpc_nr_active_client_conns = nr_active;
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ ASSERTCMP(nr_active, >=, 0);
+ _leave(" [culled]");
+}
+
+/*
+ * Discard expired client connections from the idle list. Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+{
+ struct rxrpc_connection *conn;
+ unsigned long expiry, conn_expires_at, now;
+ unsigned int nr_conns;
+ bool did_discard = false;
+
+ _enter("%c", work ? 'w' : 'n');
+
+ if (list_empty(&rxrpc_idle_client_conns)) {
+ _leave(" [empty]");
+ return;
+ }
+
+ /* Don't double up on the discarding */
+ if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+ _leave(" [already]");
+ return;
+ }
+
+ /* We keep an estimate of what the number of conns ought to be after
+ * we've discarded some so that we don't overdo the discarding.
+ */
+ nr_conns = rxrpc_nr_client_conns;
+
+next:
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ if (list_empty(&rxrpc_idle_client_conns))
+ goto out;
+
+ conn = list_entry(rxrpc_idle_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
+
+ if (!rxrpc_kill_all_client_conns) {
+ /* If the number of connections is over the reap limit, we
+ * expedite discard by reducing the expiry timeout. We must,
+ * however, have at least a short grace period to be able to do
+ * final-ACK or ABORT retransmission.
+ */
+ expiry = rxrpc_conn_idle_client_expiry;
+ if (nr_conns > rxrpc_reap_client_connections)
+ expiry = rxrpc_conn_idle_client_fast_expiry;
+
+ conn_expires_at = conn->idle_timestamp + expiry;
+
+ now = READ_ONCE(jiffies);
+ if (time_after(conn_expires_at, now))
+ goto not_yet_expired;
+ }
+
+ _debug("discard conn %d", conn->debug_id);
+ if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+ BUG();
+ conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+ list_del_init(&conn->cache_link);
+
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+
+ /* When we cleared the EXPOSED flag, we took on responsibility for the
+ * reference that that had on the usage count. We deal with that here.
+ * If someone re-sets the flag and re-gets the ref, that's fine.
+ */
+ rxrpc_put_connection(conn);
+ did_discard = true;
+ nr_conns--;
+ goto next;
+
+not_yet_expired:
+ /* The connection at the front of the queue hasn't yet expired, so
+ * schedule the work item for that point if we discarded something.
+ *
+ * We don't worry if the work item is already scheduled - it can look
+ * after rescheduling itself at a later time. We could cancel it, but
+ * then things get messier.
+ */
+ _debug("not yet");
+ if (!rxrpc_kill_all_client_conns)
+ queue_delayed_work(rxrpc_workqueue,
+ &rxrpc_client_conn_reap,
+ conn_expires_at - now);
+
+out:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxrpc_client_conn_discard_mutex);
+ _leave("");
+}
+
+/*
+ * Preemptively destroy all the client connection records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_client_connections(void)
+{
+ _enter("");
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ rxrpc_kill_all_client_conns = true;
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+
+ cancel_delayed_work(&rxrpc_client_conn_reap);
+
+ if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+ _debug("destroy: queue failed");
+
+ _leave("");
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index cee0f35bc1cf..6296374df840 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -25,6 +25,114 @@
#include "ar-internal.h"
/*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+static void rxrpc_conn_retransmit(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_channel *chan;
+ struct msghdr msg;
+ struct kvec iov;
+ struct {
+ struct rxrpc_wire_header whdr;
+ union {
+ struct {
+ __be32 code;
+ } abort;
+ struct {
+ struct rxrpc_ackpacket ack;
+ u8 padding[3];
+ struct rxrpc_ackinfo info;
+ };
+ };
+ } __attribute__((packed)) pkt;
+ size_t len;
+ u32 serial, mtu, call_id;
+
+ _enter("%d", conn->debug_id);
+
+ chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+
+ /* If the last call got moved on whilst we were waiting to run, just
+ * ignore this packet.
+ */
+ call_id = READ_ONCE(chan->last_call);
+ /* Sync with __rxrpc_disconnect_call() */
+ smp_rmb();
+ if (call_id != sp->hdr.callNumber)
+ return;
+
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt.whdr.epoch = htonl(sp->hdr.epoch);
+ pkt.whdr.cid = htonl(sp->hdr.cid);
+ pkt.whdr.callNumber = htonl(sp->hdr.callNumber);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = chan->last_type;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = conn->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(chan->last_service_id);
+
+ len = sizeof(pkt.whdr);
+ switch (chan->last_type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ pkt.abort.code = htonl(chan->last_abort);
+ len += sizeof(pkt.abort);
+ break;
+
+ case RXRPC_PACKET_TYPE_ACK:
+ mtu = conn->params.peer->if_mtu;
+ mtu -= conn->params.peer->hdrsize;
+ pkt.ack.bufferSpace = 0;
+ pkt.ack.maxSkew = htons(skb->priority);
+ pkt.ack.firstPacket = htonl(chan->last_seq);
+ pkt.ack.previousPacket = htonl(chan->last_seq - 1);
+ pkt.ack.serial = htonl(sp->hdr.serial);
+ pkt.ack.reason = RXRPC_ACK_DUPLICATE;
+ pkt.ack.nAcks = 0;
+ pkt.info.rxMTU = htonl(rxrpc_rx_mtu);
+ pkt.info.maxMTU = htonl(mtu);
+ pkt.info.rwind = htonl(rxrpc_rx_window_size);
+ pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ len += sizeof(pkt.ack) + sizeof(pkt.info);
+ break;
+ }
+
+ /* Resync with __rxrpc_disconnect_call() and check that the last call
+ * didn't get advanced whilst we were filling out the packets.
+ */
+ smp_rmb();
+ if (READ_ONCE(chan->last_call) != call_id)
+ return;
+
+ iov.iov_base = &pkt;
+ iov.iov_len = len;
+
+ serial = atomic_inc_return(&conn->serial);
+ pkt.whdr.serial = htonl(serial);
+
+ switch (chan->last_type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
+ break;
+ case RXRPC_PACKET_TYPE_ACK:
+ _proto("Tx ACK %%%u [re]", serial);
+ break;
+ }
+
+ kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+ _leave("");
+ return;
+}
+
+/*
* pass a connection-level abort onto all calls on that connection
*/
static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
@@ -166,6 +274,12 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ case RXRPC_PACKET_TYPE_ACK:
+ rxrpc_conn_retransmit(conn, skb);
+ rxrpc_free_skb(skb);
+ return 0;
+
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
return -EPROTO;
@@ -277,6 +391,7 @@ void rxrpc_process_connection(struct work_struct *work)
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
+ rxrpc_see_skb(skb);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
@@ -365,6 +480,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
whdr.type = RXRPC_PACKET_TYPE_ABORT;
while ((skb = skb_dequeue(&local->reject_queue))) {
+ rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
switch (sa.sa.sa_family) {
case AF_INET:
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 896d84493a05..5b45b6c367e7 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -1,6 +1,6 @@
-/* RxRPC virtual connection handler
+/* RxRPC virtual connection handler, common bits.
*
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -15,8 +15,6 @@
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
#include "ar-internal.h"
/*
@@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60;
static void rxrpc_connection_reaper(struct work_struct *work);
LIST_HEAD(rxrpc_connections);
+LIST_HEAD(rxrpc_connection_proc_list);
DEFINE_RWLOCK(rxrpc_connection_lock);
static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+static void rxrpc_destroy_connection(struct rcu_head *);
+
/*
* allocate a new connection
*/
@@ -41,21 +42,19 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
if (conn) {
+ INIT_LIST_HEAD(&conn->cache_link);
spin_lock_init(&conn->channel_lock);
- init_waitqueue_head(&conn->channel_wq);
+ INIT_LIST_HEAD(&conn->waiting_calls);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
skb_queue_head_init(&conn->rx_queue);
conn->security = &rxrpc_no_security;
spin_lock_init(&conn->state_lock);
- /* We maintain an extra ref on the connection whilst it is
- * on the rxrpc_connections list.
- */
- atomic_set(&conn->usage, 2);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
conn->size_align = 4;
conn->header_size = sizeof(struct rxrpc_wire_header);
+ conn->idle_timestamp = jiffies;
}
_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -153,25 +152,32 @@ not_found:
* terminates. The caller must hold the channel_lock and must release the
* call's ref on the connection.
*/
-void __rxrpc_disconnect_call(struct rxrpc_call *call)
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_channel *chan = &conn->channels[call->channel];
+ struct rxrpc_channel *chan =
+ &conn->channels[call->cid & RXRPC_CHANNELMASK];
- _enter("%d,%d", conn->debug_id, call->channel);
+ _enter("%d,%x", conn->debug_id, call->cid);
if (rcu_access_pointer(chan->call) == call) {
/* Save the result of the call so that we can repeat it if necessary
* through the channel, whilst disposing of the actual call record.
*/
- chan->last_result = call->local_abort;
+ chan->last_service_id = call->service_id;
+ if (call->local_abort) {
+ chan->last_abort = call->local_abort;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ } else {
+ chan->last_seq = call->rx_data_eaten;
+ chan->last_type = RXRPC_PACKET_TYPE_ACK;
+ }
+ /* Sync with rxrpc_conn_retransmit(). */
smp_wmb();
chan->last_call = chan->call_id;
chan->call_id = chan->call_counter;
rcu_assign_pointer(chan->call, NULL);
- atomic_inc(&conn->avail_chans);
- wake_up(&conn->channel_wq);
}
_leave("");
@@ -185,34 +191,52 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ if (rxrpc_is_client_call(call))
+ return rxrpc_disconnect_client_call(call);
+
spin_lock(&conn->channel_lock);
- __rxrpc_disconnect_call(call);
+ __rxrpc_disconnect_call(conn, call);
spin_unlock(&conn->channel_lock);
call->conn = NULL;
+ conn->idle_timestamp = jiffies;
rxrpc_put_connection(conn);
}
/*
- * release a virtual connection
+ * Kill off a connection.
*/
-void rxrpc_put_connection(struct rxrpc_connection *conn)
+void rxrpc_kill_connection(struct rxrpc_connection *conn)
{
- if (!conn)
- return;
+ ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+ !rcu_access_pointer(conn->channels[1].call) &&
+ !rcu_access_pointer(conn->channels[2].call) &&
+ !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(list_empty(&conn->cache_link));
- _enter("%p{u=%d,d=%d}",
- conn, atomic_read(&conn->usage), conn->debug_id);
+ write_lock(&rxrpc_connection_lock);
+ list_del_init(&conn->proc_link);
+ write_unlock(&rxrpc_connection_lock);
- ASSERTCMP(atomic_read(&conn->usage), >, 1);
+ /* Drain the Rx queue. Note that even though we've unpublished, an
+ * incoming packet could still be being added to our Rx queue, so we
+ * will need to drain it again in the RCU cleanup handler.
+ */
+ rxrpc_purge_queue(&conn->rx_queue);
- conn->put_time = ktime_get_seconds();
- if (atomic_dec_return(&conn->usage) == 1) {
- _debug("zombie");
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
- }
+ /* Leave final destruction to RCU. The connection processor work item
+ * must carry a ref on the connection to prevent us getting here whilst
+ * it is queued or running.
+ */
+ call_rcu(&conn->rcu, rxrpc_destroy_connection);
+}
- _leave("");
+/*
+ * release a virtual connection
+ */
+void __rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
}
/*
@@ -242,19 +266,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
}
/*
- * reap dead connections
+ * reap dead service connections
*/
static void rxrpc_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
- unsigned long reap_older_than, earliest, put_time, now;
+ unsigned long reap_older_than, earliest, idle_timestamp, now;
LIST_HEAD(graveyard);
_enter("");
- now = ktime_get_seconds();
- reap_older_than = now - rxrpc_connection_expiry;
+ now = jiffies;
+ reap_older_than = now - rxrpc_connection_expiry * HZ;
earliest = ULONG_MAX;
write_lock(&rxrpc_connection_lock);
@@ -263,10 +287,14 @@ static void rxrpc_connection_reaper(struct work_struct *work)
if (likely(atomic_read(&conn->usage) > 1))
continue;
- put_time = READ_ONCE(conn->put_time);
- if (time_after(put_time, reap_older_than)) {
- if (time_before(put_time, earliest))
- earliest = put_time;
+ idle_timestamp = READ_ONCE(conn->idle_timestamp);
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long)reap_older_than - (long)idle_timestamp);
+
+ if (time_after(idle_timestamp, reap_older_than)) {
+ if (time_before(idle_timestamp, earliest))
+ earliest = idle_timestamp;
continue;
}
@@ -277,7 +305,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
continue;
if (rxrpc_conn_is_client(conn))
- rxrpc_unpublish_client_conn(conn);
+ BUG();
else
rxrpc_unpublish_service_conn(conn);
@@ -287,9 +315,9 @@ static void rxrpc_connection_reaper(struct work_struct *work)
if (earliest != ULONG_MAX) {
_debug("reschedule reaper %ld", (long) earliest - now);
- ASSERTCMP(earliest, >, now);
+ ASSERT(time_after(earliest, now));
rxrpc_queue_delayed_work(&rxrpc_connection_reap,
- (earliest - now) * HZ);
+ earliest - now);
}
while (!list_empty(&graveyard)) {
@@ -298,16 +326,15 @@ static void rxrpc_connection_reaper(struct work_struct *work)
list_del_init(&conn->link);
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- skb_queue_purge(&conn->rx_queue);
- call_rcu(&conn->rcu, rxrpc_destroy_connection);
+ rxrpc_kill_connection(conn);
}
_leave("");
}
/*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
*/
void __exit rxrpc_destroy_all_connections(void)
{
@@ -316,6 +343,8 @@ void __exit rxrpc_destroy_all_connections(void)
_enter("");
+ rxrpc_destroy_all_client_connections();
+
rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -330,6 +359,8 @@ void __exit rxrpc_destroy_all_connections(void)
write_unlock(&rxrpc_connection_lock);
BUG_ON(leak);
+ ASSERT(list_empty(&rxrpc_connection_proc_list));
+
/* Make sure the local and peer records pinned by any dying connections
* are released.
*/
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index fd9027ccba8f..316a92107fee 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -185,8 +185,14 @@ struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
rxrpc_get_local(local);
+ /* We maintain an extra ref on the connection whilst it is on
+ * the rxrpc_connections list.
+ */
+ atomic_set(&conn->usage, 2);
+
write_lock(&rxrpc_connection_lock);
list_add_tail(&conn->link, &rxrpc_connections);
+ list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
write_unlock(&rxrpc_connection_lock);
/* Make the connection a target for incoming packets. */
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 70bb77818dea..5e683dd21ab9 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -125,6 +125,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
bool terminal;
int ret, ackbit, ack;
u32 serial;
+ u16 skew;
u8 flags;
_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
@@ -133,6 +134,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
ASSERTCMP(sp->call, ==, NULL);
flags = sp->hdr.flags;
serial = sp->hdr.serial;
+ skew = skb->priority;
spin_lock(&call->lock);
@@ -231,7 +233,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
spin_unlock(&call->lock);
atomic_inc(&call->ackr_not_idle);
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false);
_leave(" = 0 [posted]");
return 0;
@@ -244,7 +246,7 @@ out:
discard_and_ack:
_debug("discard and ACK packet %p", skb);
- __rxrpc_propose_ACK(call, ack, serial, true);
+ __rxrpc_propose_ACK(call, ack, skew, serial, true);
discard:
spin_unlock(&call->lock);
rxrpc_free_skb(skb);
@@ -252,7 +254,7 @@ discard:
return 0;
enqueue_and_ack:
- __rxrpc_propose_ACK(call, ack, serial, true);
+ __rxrpc_propose_ACK(call, ack, skew, serial, true);
enqueue_packet:
_net("defer skb %p", skb);
spin_unlock(&call->lock);
@@ -304,7 +306,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
__be32 wtmp;
- u32 hi_serial, abort_code;
+ u32 abort_code;
_enter("%p,%p", call, skb);
@@ -321,18 +323,12 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
}
#endif
- /* track the latest serial number on this connection for ACK packet
- * information */
- hi_serial = atomic_read(&call->conn->hi_serial);
- while (sp->hdr.serial > hi_serial)
- hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
- sp->hdr.serial);
-
/* request ACK generation for any ACK or DATA packet that requests
* it */
if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
_proto("ACK Requested on %%%u", sp->hdr.serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
+ skb->priority, sp->hdr.serial, false);
}
switch (sp->hdr.type) {
@@ -570,7 +566,8 @@ done:
/*
* post connection-level events to the connection
- * - this includes challenges, responses and some aborts
+ * - this includes challenges, responses, some aborts and call terminal packet
+ * retransmission.
*/
static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
@@ -637,7 +634,7 @@ void rxrpc_data_ready(struct sock *sk)
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local = sk->sk_user_data;
struct sk_buff *skb;
- int ret;
+ int ret, skew;
_enter("%p", sk);
@@ -700,25 +697,64 @@ void rxrpc_data_ready(struct sock *sk)
rcu_read_lock();
conn = rxrpc_find_connection_rcu(local, skb);
- if (!conn)
+ if (!conn) {
+ skb->priority = 0;
goto cant_route_call;
+ }
+
+ /* Note the serial number skew here */
+ skew = (int)sp->hdr.serial - (int)conn->hi_serial;
+ if (skew >= 0) {
+ if (skew > 0)
+ conn->hi_serial = sp->hdr.serial;
+ skb->priority = 0;
+ } else {
+ skew = -skew;
+ skb->priority = min(skew, 65535);
+ }
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
rxrpc_post_packet_to_conn(conn, skb);
+ goto out_unlock;
} else {
/* Call-bound packets are routed by connection channel. */
unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
struct rxrpc_channel *chan = &conn->channels[channel];
- struct rxrpc_call *call = rcu_dereference(chan->call);
+ struct rxrpc_call *call;
+
+ /* Ignore really old calls */
+ if (sp->hdr.callNumber < chan->last_call)
+ goto discard_unlock;
+
+ if (sp->hdr.callNumber == chan->last_call) {
+ /* For the previous service call, if completed
+ * successfully, we discard all further packets.
+ */
+ if (rxrpc_conn_is_service(conn) &&
+ (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+ goto discard_unlock;
+
+ /* But otherwise we need to retransmit the final packet
+ * from data cached in the connection record.
+ */
+ rxrpc_post_packet_to_conn(conn, skb);
+ goto out_unlock;
+ }
+ call = rcu_dereference(chan->call);
if (!call || atomic_read(&call->usage) == 0)
goto cant_route_call;
rxrpc_post_packet_to_call(call, skb);
+ goto out_unlock;
}
+discard_unlock:
+ rxrpc_free_skb(skb);
+out_unlock:
rcu_read_unlock();
out:
return;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 31a3f86ef2f6..bcc6593b4cdb 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -93,6 +93,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
if (skb) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_see_skb(skb);
_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
switch (sp->hdr.type) {
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f4bda06b7d2d..8a9917cba6fe 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -218,11 +218,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = 0;
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (!call->in_clientflag &&
+ } else if (rxrpc_is_client_call(call) &&
call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
/* request phase complete for this client call */
ret = -EPROTO;
- } else if (call->in_clientflag &&
+ } else if (rxrpc_is_service_call(call) &&
call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
/* Reply phase not begun or not complete for service call. */
@@ -390,7 +390,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
call->acks_winsz),
*timeo);
- add_wait_queue(&call->tx_waitq, &myself);
+ add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -408,7 +408,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
lock_sock(&rx->sk);
}
- remove_wait_queue(&call->tx_waitq, &myself);
+ remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
_leave(" = %d", ret);
return ret;
@@ -482,6 +482,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
/* the packet may be freed by rxrpc_process_call() before this
* returns */
+ if (rxrpc_is_client_call(call))
+ rxrpc_expose_client_call(call);
ret = rxrpc_send_data_packet(call->conn, skb);
_net("sent skb %p", skb);
} else {
@@ -548,6 +550,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
skb = call->tx_pending;
call->tx_pending = NULL;
+ rxrpc_see_skb(skb);
copied = 0;
do {
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index ced5f07444e5..060fb4892c39 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -46,7 +46,9 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
- struct rxrpc_connection *conn;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
@@ -60,15 +62,24 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call = list_entry(v, struct rxrpc_call, link);
- sprintf(lbuff, "%pI4:%u",
- &call->local->srx.transport.sin.sin_addr,
- ntohs(call->local->srx.transport.sin.sin_port));
+ rx = READ_ONCE(call->socket);
+ if (rx) {
+ local = READ_ONCE(rx->local);
+ if (local)
+ sprintf(lbuff, "%pI4:%u",
+ &local->srx.transport.sin.sin_addr,
+ ntohs(local->srx.transport.sin.sin_port));
+ else
+ strcpy(lbuff, "no_local");
+ } else {
+ strcpy(lbuff, "no_socket");
+ }
- conn = call->conn;
- if (conn)
+ peer = call->peer;
+ if (peer)
sprintf(rbuff, "%pI4:%u",
- &conn->params.peer->srx.transport.sin.sin_addr,
- ntohs(conn->params.peer->srx.transport.sin.sin_port));
+ &peer->srx.transport.sin.sin_addr,
+ ntohs(peer->srx.transport.sin.sin_port));
else
strcpy(rbuff, "no_connection");
@@ -80,7 +91,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->service_id,
call->cid,
call->call_id,
- call->in_clientflag ? "Svc" : "Clt",
+ rxrpc_is_service_call(call) ? "Svc" : "Clt",
atomic_read(&call->usage),
rxrpc_call_states[call->state],
call->remote_abort ?: call->local_abort,
@@ -115,13 +126,13 @@ const struct file_operations rxrpc_call_seq_fops = {
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
{
read_lock(&rxrpc_connection_lock);
- return seq_list_start_head(&rxrpc_connections, *_pos);
+ return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
}
static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
loff_t *pos)
{
- return seq_list_next(v, &rxrpc_connections, pos);
+ return seq_list_next(v, &rxrpc_connection_proc_list, pos);
}
static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -134,7 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
struct rxrpc_connection *conn;
char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
- if (v == &rxrpc_connections) {
+ if (v == &rxrpc_connection_proc_list) {
seq_puts(seq,
"Proto Local Remote "
" SvID ConnID End Use State Key "
@@ -143,7 +154,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
return 0;
}
- conn = list_entry(v, struct rxrpc_connection, link);
+ conn = list_entry(v, struct rxrpc_connection, proc_link);
sprintf(lbuff, "%pI4:%u",
&conn->params.local->srx.transport.sin.sin_addr,
@@ -165,7 +176,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
- atomic_read(&conn->hi_serial));
+ conn->hi_serial);
return 0;
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9ed66d533002..b964c2d49a88 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -111,6 +111,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
}
peek_next_packet:
+ rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
call = sp->call;
ASSERT(call != NULL);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 63afa9e9cc08..89f475febfd7 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -275,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = call->channel << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
call->crypto_buf[0] = htonl(sp->hdr.callNumber);
call->crypto_buf[1] = htonl(x);
@@ -507,7 +507,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call,
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
/* validate the security checksum */
- x = call->channel << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
call->crypto_buf[0] = htonl(call->call_id);
call->crypto_buf[1] = htonl(x);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 06c51d4b622d..fbd8c74d9505 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -53,9 +53,9 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call)
/*
* drop the bottom ACK off of the call ACK window and advance the window
*/
-static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
- struct rxrpc_skb_priv *sp)
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb)
{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
int loop;
u32 seq;
@@ -91,8 +91,8 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
* its Tx bufferage.
*/
_debug("send Rx idle ACK");
- __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
- false);
+ __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE,
+ skb->priority, sp->hdr.serial, false);
}
spin_unlock_bh(&call->lock);
@@ -125,7 +125,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
call->rx_data_recv = sp->hdr.seq;
- rxrpc_hard_ACK_data(call, sp);
+ rxrpc_hard_ACK_data(call, skb);
}
EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
@@ -163,3 +163,65 @@ void rxrpc_kernel_free_skb(struct sk_buff *skb)
rxrpc_free_skb(skb);
}
EXPORT_SYMBOL(rxrpc_kernel_free_skb);
+
+/*
+ * Note the existence of a new-to-us socket buffer (allocated or dequeued).
+ */
+void rxrpc_new_skb(struct sk_buff *skb)
+{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(&rxrpc_n_skbs);
+ trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here);
+}
+
+/*
+ * Note the re-emergence of a socket buffer from a queue or buffer.
+ */
+void rxrpc_see_skb(struct sk_buff *skb)
+{
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n = atomic_read(&rxrpc_n_skbs);
+ trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here);
+ }
+}
+
+/*
+ * Note the addition of a ref on a socket buffer.
+ */
+void rxrpc_get_skb(struct sk_buff *skb)
+{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(&rxrpc_n_skbs);
+ trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here);
+ skb_get(skb);
+}
+
+/*
+ * Note the destruction of a socket buffer.
+ */
+void rxrpc_free_skb(struct sk_buff *skb)
+{
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n;
+ CHECK_SLAB_OKAY(&skb->users);
+ n = atomic_dec_return(&rxrpc_n_skbs);
+ trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
+ }
+}
+
+/*
+ * Clear a queue of socket buffers.
+ */
+void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+ const void *here = __builtin_return_address(0);
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue((list))) != NULL) {
+ int n = atomic_dec_return(&rxrpc_n_skbs);
+ trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
+ }
+}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 03ad08774d4e..dc380af8a81e 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -62,6 +62,22 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.proc_handler = proc_dointvec_ms_jiffies,
.extra1 = (void *)&one,
},
+ {
+ .procname = "idle_conn_expiry",
+ .data = &rxrpc_conn_idle_client_expiry,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_conn_fast_expiry",
+ .data = &rxrpc_conn_idle_client_fast_expiry,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
/* Values measured in seconds but used in jiffies */
{
@@ -81,17 +97,24 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)&one,
},
- /* Values measured in seconds */
+ /* Non-time values */
+ {
+ .procname = "max_client_conns",
+ .data = &rxrpc_max_client_connections,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&rxrpc_reap_client_connections,
+ },
{
- .procname = "connection_expiry",
- .data = &rxrpc_connection_expiry,
+ .procname = "reap_client_conns",
+ .data = &rxrpc_reap_client_connections,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&one,
+ .extra2 = (void *)&rxrpc_max_client_connections,
},
-
- /* Non-time values */
{
.procname = "max_backlog",
.data = &rxrpc_max_backlog,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 691409de3e1a..59a8d3150ae2 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
goto drop;
break;
case TCA_VLAN_ACT_PUSH:
- err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid);
+ err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
+ (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
if (err)
goto drop;
break;
@@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
[TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) },
[TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 },
[TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 },
+ [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NLA_U8 },
};
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
int action;
__be16 push_vid = 0;
__be16 push_proto = 0;
+ u8 push_prio = 0;
bool exists = false;
int ret = 0, err;
@@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
} else {
push_proto = htons(ETH_P_8021Q);
}
+
+ if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+ push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
break;
default:
if (exists)
@@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
v->tcfv_action = action;
v->tcfv_push_vid = push_vid;
+ v->tcfv_push_prio = push_prio;
v->tcfv_push_proto = push_proto;
v->tcf_action = parm->action;
@@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
- v->tcfv_push_proto)))
+ v->tcfv_push_proto) ||
+ (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
+ v->tcfv_push_prio))))
goto nla_put_failure;
tcf_tm_dump(&t, &v->tcf_tm);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0b8c3ace671f..eb219b78cd49 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct tcf_exts e;
struct tcf_ematch_tree t;
- tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ goto errout;
err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
if (err < 0)
@@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
return -ENOBUFS;
- tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ if (err < 0)
+ goto errout;
+
err = -EINVAL;
if (handle) {
fnew->handle = handle;
@@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c3002c2c68bb..4742f415ee5b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -311,17 +311,19 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
- ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
if (ret < 0)
return ret;
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ if (ret < 0)
+ goto errout;
if (tb[TCA_BPF_FLAGS]) {
u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
- tcf_exts_destroy(&exts);
- return -EINVAL;
+ ret = -EINVAL;
+ goto errout;
}
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
@@ -331,10 +333,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
cls_bpf_prog_from_efd(tb, prog, tp);
- if (ret < 0) {
- tcf_exts_destroy(&exts);
- return ret;
- }
+ if (ret < 0)
+ goto errout;
if (tb[TCA_BPF_CLASSID]) {
prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
@@ -343,6 +343,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
tcf_exts_change(tp, &prog->exts, &exts);
return 0;
+
+errout:
+ tcf_exts_destroy(&exts);
+ return ret;
}
static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -388,7 +392,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (!prog)
return -ENOBUFS;
- tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ if (ret < 0)
+ goto errout;
if (oldprog) {
if (handle && oldprog->handle != handle) {
@@ -420,9 +426,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) prog;
return 0;
+
errout:
+ tcf_exts_destroy(&prog->exts);
kfree(prog);
-
return ret;
}
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 4c85bd3a750c..85233c470035 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ if (err < 0)
+ goto errout;
new->handle = handle;
new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
@@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
if (err < 0)
goto errout;
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ if (err < 0) {
+ tcf_exts_destroy(&e);
+ goto errout;
+ }
err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
if (err < 0) {
@@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
return 0;
errout:
+ tcf_exts_destroy(&new->exts);
kfree(new);
return err;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index fbfec6a18839..2c1ae549edbf 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -418,10 +418,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
return -EOPNOTSUPP;
}
- tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ if (err < 0)
+ goto err1;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
- return err;
+ goto err1;
err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
if (err < 0)
@@ -432,13 +434,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
goto err2;
- tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ if (err < 0)
+ goto err3;
fold = (struct flow_filter *)*arg;
if (fold) {
err = -EINVAL;
if (fold->handle != handle && handle)
- goto err2;
+ goto err3;
/* Copy fold into fnew */
fnew->tp = fold->tp;
@@ -458,31 +462,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
- goto err2;
+ goto err3;
if (mode == FLOW_MODE_HASH)
perturb_period = fold->perturb_period;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
- goto err2;
+ goto err3;
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
} else {
err = -EINVAL;
if (!handle)
- goto err2;
+ goto err3;
if (!tb[TCA_FLOW_KEYS])
- goto err2;
+ goto err3;
mode = FLOW_MODE_MAP;
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
- goto err2;
+ goto err3;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
- goto err2;
+ goto err3;
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
@@ -542,6 +546,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
call_rcu(&fold->rcu, flow_destroy_filter);
return 0;
+err3:
+ tcf_exts_destroy(&fnew->exts);
err2:
tcf_em_tree_destroy(&t);
kfree(fnew);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5060801a2f6d..cf9ad5b50889 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -28,6 +28,7 @@ struct fl_flow_key {
struct flow_dissector_key_control control;
struct flow_dissector_key_basic basic;
struct flow_dissector_key_eth_addrs eth;
+ struct flow_dissector_key_vlan vlan;
struct flow_dissector_key_addrs ipaddrs;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
@@ -293,6 +294,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 },
+
};
static void fl_set_key_val(struct nlattr **tb,
@@ -308,9 +313,29 @@ static void fl_set_key_val(struct nlattr **tb,
memcpy(mask, nla_data(tb[mask_type]), len);
}
+static void fl_set_key_vlan(struct nlattr **tb,
+ struct flow_dissector_key_vlan *key_val,
+ struct flow_dissector_key_vlan *key_mask)
+{
+#define VLAN_PRIORITY_MASK 0x7
+
+ if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+ key_val->vlan_id =
+ nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+ key_mask->vlan_id = VLAN_VID_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+ key_val->vlan_priority =
+ nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+ VLAN_PRIORITY_MASK;
+ key_mask->vlan_priority = VLAN_PRIORITY_MASK;
+ }
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
+ __be16 ethertype;
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FLOWER_INDEV]) {
int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -328,9 +353,20 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
- fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
- &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
- sizeof(key->basic.n_proto));
+ if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
+ ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
+
+ if (ethertype == htons(ETH_P_8021Q)) {
+ fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
+ fl_set_key_val(tb, &key->basic.n_proto,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
+ } else {
+ key->basic.n_proto = ethertype;
+ mask->basic.n_proto = cpu_to_be16(~0);
+ }
+ }
if (key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) {
@@ -404,12 +440,10 @@ static int fl_init_hashtable(struct cls_fl_head *head,
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
-#define FL_KEY_MEMBER_END_OFFSET(member) \
- (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
-#define FL_KEY_IN_RANGE(mask, member) \
- (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
- FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+#define FL_KEY_IS_MASKED(mask, member) \
+ memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
+ 0, FL_KEY_MEMBER_SIZE(member)) \
#define FL_KEY_SET(keys, cnt, id, member) \
do { \
@@ -418,9 +452,9 @@ static int fl_init_hashtable(struct cls_fl_head *head,
cnt++; \
} while(0);
-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member) \
do { \
- if (FL_KEY_IN_RANGE(mask, member)) \
+ if (FL_KEY_IS_MASKED(mask, member)) \
FL_KEY_SET(keys, cnt, id, member); \
} while(0);
@@ -432,14 +466,16 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_PORTS, tp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_VLAN, vlan);
skb_flow_dissector_init(&head->dissector, keys, cnt);
}
@@ -478,10 +514,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct tcf_exts e;
int err;
- tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ goto errout;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -550,7 +588,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
return -ENOBUFS;
- tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+ if (err < 0)
+ goto errout;
if (!handle) {
handle = fl_grab_new_handle(tp, head);
@@ -614,6 +654,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
@@ -668,6 +709,29 @@ static int fl_dump_key_val(struct sk_buff *skb,
return 0;
}
+static int fl_dump_key_vlan(struct sk_buff *skb,
+ struct flow_dissector_key_vlan *vlan_key,
+ struct flow_dissector_key_vlan *vlan_mask)
+{
+ int err;
+
+ if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
+ return 0;
+ if (vlan_mask->vlan_id) {
+ err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+ vlan_key->vlan_id);
+ if (err)
+ return err;
+ }
+ if (vlan_mask->vlan_priority) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+ vlan_key->vlan_priority);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -712,6 +776,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
&mask->basic.n_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
+
+ if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+ goto nla_put_failure;
+
if ((key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) &&
fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index f23a3b68bba6..cc0bda945800 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -195,10 +195,12 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
u32 mask;
int err;
- tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ if (err < 0)
+ goto errout;
if (tb[TCA_FW_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
#endif /* CONFIG_NET_CLS_IND */
fnew->tp = f->tp;
- tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ if (err < 0) {
+ kfree(fnew);
+ return err;
+ }
err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
if (err < 0) {
+ tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
@@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
return -ENOBUFS;
- tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ if (err < 0)
+ goto errout;
f->id = handle;
f->tp = tp;
@@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 08a3b0a6f5ab..c91e65d81a48 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -383,17 +383,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *est, int new,
bool ovr)
{
- int err;
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
unsigned int h1;
struct route4_bucket *b;
struct tcf_exts e;
+ int err;
- tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ goto errout;
err = -EINVAL;
if (tb[TCA_ROUTE4_TO]) {
@@ -503,7 +505,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (!f)
goto errout;
- tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ if (err < 0)
+ goto errout;
+
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
@@ -557,6 +562,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index f9c9fc075fe6..4f05a19fb073 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ if (err < 0)
+ goto errout2;
f = (struct rsvp_filter *)*arg;
if (f) {
@@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout2;
}
- tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ if (err < 0) {
+ kfree(n);
+ goto errout2;
+ }
if (tb[TCA_RSVP_CLASSID]) {
n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
@@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout2;
- tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ if (err < 0)
+ goto errout;
h2 = 16;
if (tb[TCA_RSVP_SRC]) {
memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -627,6 +635,7 @@ insert:
goto insert;
errout:
+ tcf_exts_destroy(&f->exts);
kfree(f);
errout2:
tcf_exts_destroy(&e);
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 944c8ff45055..d9500709831f 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
-static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r)
{
memset(r, 0, sizeof(*r));
- tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
static void __tcindex_partial_destroy(struct rcu_head *head)
@@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head)
kfree(p);
}
+static void tcindex_free_perfect_hash(struct tcindex_data *cp)
+{
+ int i;
+
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_destroy(&cp->perfect[i].exts);
+ kfree(cp->perfect);
+}
+
+static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+{
+ int i, err = 0;
+
+ cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
+ GFP_KERNEL);
+ if (!cp->perfect)
+ return -ENOMEM;
+
+ for (i = 0; i < cp->hash; i++) {
+ err = tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ if (err < 0)
+ goto errout;
+ }
+
+ return 0;
+
+errout:
+ tcindex_free_perfect_hash(cp);
+ return err;
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
- struct tcindex_data *cp, *oldp;
+ struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
+ int err, balloc = 0;
struct tcf_exts e;
- tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ goto errout;
err = -ENOMEM;
/* tcindex_data attributes must look atomic to classifier/lookup so
@@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (p->perfect) {
int i;
- cp->perfect = kmemdup(p->perfect,
- sizeof(*r) * cp->hash, GFP_KERNEL);
- if (!cp->perfect)
+ if (tcindex_alloc_perfect_hash(cp) < 0)
goto errout;
for (i = 0; i < cp->hash; i++)
- tcf_exts_init(&cp->perfect[i].exts,
- TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
cp->h = p->h;
- tcindex_filter_result_init(&new_filter_result);
- tcindex_filter_result_init(&cr);
+ err = tcindex_filter_result_init(&new_filter_result);
+ if (err < 0)
+ goto errout1;
+ err = tcindex_filter_result_init(&cr);
+ if (err < 0)
+ goto errout1;
if (old_r)
cr.res = r->res;
@@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp->perfect && !cp->h) {
if (valid_perfect_hash(cp)) {
- int i;
-
- cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
- if (!cp->perfect)
+ if (tcindex_alloc_perfect_hash(cp) < 0)
goto errout_alloc;
- for (i = 0; i < cp->hash; i++)
- tcf_exts_init(&cp->perfect[i].exts,
- TCA_TCINDEX_ACT,
- TCA_TCINDEX_POLICE);
balloc = 1;
} else {
struct tcindex_filter __rcu **hash;
@@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (!f)
goto errout_alloc;
f->key = handle;
- tcindex_filter_result_init(&f->result);
f->next = NULL;
+ err = tcindex_filter_result_init(&f->result);
+ if (err < 0) {
+ kfree(f);
+ goto errout_alloc;
+ }
}
if (tb[TCA_TCINDEX_CLASSID]) {
@@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
else
tcf_exts_change(tp, &cr.exts, &e);
- if (old_r && old_r != r)
- tcindex_filter_result_init(old_r);
+ if (old_r && old_r != r) {
+ err = tcindex_filter_result_init(old_r);
+ if (err < 0) {
+ kfree(f);
+ goto errout_alloc;
+ }
+ }
oldp = p;
r->res = cr.res;
@@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
errout_alloc:
if (balloc == 1)
- kfree(cp->perfect);
+ tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
+errout1:
+ tcf_exts_destroy(&cr.exts);
+ tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
tcf_exts_destroy(&e);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ffe593efe930..a29263a9d8c1 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -709,13 +709,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- int err;
struct tcf_exts e;
+ int err;
- tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
return err;
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ goto errout;
err = -EINVAL;
if (tb[TCA_U32_LINK]) {
@@ -833,7 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
new->tp = tp;
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+ kfree(new);
+ return NULL;
+ }
return new;
}
@@ -985,9 +990,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
n->flags = flags;
- tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
n->tp = tp;
+ err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ if (err < 0)
+ goto errout;
+
#ifdef CONFIG_CLS_U32_MARK
n->pcpu_success = alloc_percpu(u32);
if (!n->pcpu_success) {
@@ -1028,9 +1036,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
errhw:
#ifdef CONFIG_CLS_U32_MARK
free_percpu(n->pcpu_success);
-errout:
#endif
+errout:
+ tcf_exts_destroy(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
free_percpu(n->pf);
#endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 12ebde845523..d677b3484d81 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
#include <linux/hrtimer.h>
#include <linux/lockdep.h>
#include <linux/slab.h>
+#include <linux/hashtable.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -259,37 +260,40 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
{
struct Qdisc *q;
+ if (!qdisc_dev(root))
+ return (root->handle == handle ? root : NULL);
+
if (!(root->flags & TCQ_F_BUILTIN) &&
root->handle == handle)
return root;
- list_for_each_entry_rcu(q, &root->list, list) {
+ hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
if (q->handle == handle)
return q;
}
return NULL;
}
-void qdisc_list_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
struct Qdisc *root = qdisc_dev(q)->qdisc;
WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
- list_add_tail_rcu(&q->list, &root->list);
+ hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
}
}
-EXPORT_SYMBOL(qdisc_list_add);
+EXPORT_SYMBOL(qdisc_hash_add);
-void qdisc_list_del(struct Qdisc *q)
+void qdisc_hash_del(struct Qdisc *q)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
ASSERT_RTNL();
- list_del_rcu(&q->list);
+ hash_del_rcu(&q->hash);
}
}
-EXPORT_SYMBOL(qdisc_list_del);
+EXPORT_SYMBOL(qdisc_hash_del);
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
@@ -998,7 +1002,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
goto err_out4;
}
- qdisc_list_add(sch);
+ qdisc_hash_add(sch);
return sch;
}
@@ -1431,10 +1435,11 @@ err_out:
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
- int *q_idx_p, int s_q_idx)
+ int *q_idx_p, int s_q_idx, bool recur)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;
+ int b;
if (!root)
return 0;
@@ -1449,7 +1454,17 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
goto done;
q_idx++;
}
- list_for_each_entry(q, &root->list, list) {
+
+ /* If dumping singletons, there is no qdisc_dev(root) and the singleton
+ * itself has already been dumped.
+ *
+ * If we've already dumped the top-level (ingress) qdisc above and the global
+ * qdisc hashtable, we don't want to hit it again
+ */
+ if (!qdisc_dev(root) || !recur)
+ goto out;
+
+ hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
if (q_idx < s_q_idx) {
q_idx++;
continue;
@@ -1490,13 +1505,13 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+ if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx) < 0)
+ &q_idx, s_q_idx, false) < 0)
goto done;
cont:
@@ -1765,6 +1780,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
int *t_p, int s_t)
{
struct Qdisc *q;
+ int b;
if (!root)
return 0;
@@ -1772,7 +1788,10 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;
- list_for_each_entry(q, &root->list, list) {
+ if (!qdisc_dev(root))
+ return 0;
+
+ hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
return -1;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 657c13362b19..0d21b567ff27 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = {
.dequeue = noop_dequeue,
.flags = TCQ_F_BUILTIN,
.ops = &noop_qdisc_ops,
- .list = LIST_HEAD_INIT(noop_qdisc.list),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
.running = SEQCNT_ZERO(noop_qdisc.running),
@@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p;
}
- INIT_LIST_HEAD(&sch->list);
skb_queue_head_init(&sch->q);
spin_lock_init(&sch->busylock);
@@ -701,7 +699,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
return;
#ifdef CONFIG_NET_SCHED
- qdisc_list_del(qdisc);
+ qdisc_hash_del(qdisc);
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
@@ -789,6 +787,10 @@ static void attach_default_qdiscs(struct net_device *dev)
qdisc->ops->attach(qdisc);
}
}
+#ifdef CONFIG_NET_SCHED
+ if (dev->qdisc)
+ qdisc_hash_add(dev->qdisc);
+#endif
}
static void transition_one_qdisc(struct net_device *dev,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ddc7bd74ecb..000f1d36128e 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -142,8 +142,6 @@ struct hfsc_class {
link-sharing, max(myf, cfmin) */
u64 cl_myf; /* my fit-time (calculated from this
class's own upperlimit curve) */
- u64 cl_myfadj; /* my fit-time adjustment (to cancel
- history dependence) */
u64 cl_cfmin; /* earliest children's fit-time (used
with cl_myf to obtain cl_f) */
u64 cl_cvtmin; /* minimal virtual time among the
@@ -151,11 +149,8 @@ struct hfsc_class {
(monotonic within a period) */
u64 cl_vtadj; /* intra-period cumulative vt
adjustment */
- u64 cl_vtoff; /* inter-period cumulative vt offset */
- u64 cl_cvtmax; /* max child's vt in the last period */
- u64 cl_cvtoff; /* cumulative cvtmax of all periods */
- u64 cl_pcvtoff; /* parent's cvtoff at initialization
- time */
+ u64 cl_cvtoff; /* largest virtual time seen among
+ the children */
struct internal_sc cl_rsc; /* internal real-time service curve */
struct internal_sc cl_fsc; /* internal fair service curve */
@@ -701,28 +696,16 @@ init_vf(struct hfsc_class *cl, unsigned int len)
} else {
/*
* first child for a new parent backlog period.
- * add parent's cvtmax to cvtoff to make a new
- * vt (vtoff + vt) larger than the vt in the
- * last period for all children.
+ * initialize cl_vt to the highest value seen
+ * among the siblings. this is analogous to
+ * what cur_time would provide in realtime case.
*/
- vt = cl->cl_parent->cl_cvtmax;
- cl->cl_parent->cl_cvtoff += vt;
- cl->cl_parent->cl_cvtmax = 0;
+ cl->cl_vt = cl->cl_parent->cl_cvtoff;
cl->cl_parent->cl_cvtmin = 0;
- cl->cl_vt = 0;
}
- cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
- cl->cl_pcvtoff;
-
/* update the virtual curve */
- vt = cl->cl_vt + cl->cl_vtoff;
- rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
- cl->cl_total);
- if (cl->cl_virtual.x == vt) {
- cl->cl_virtual.x -= cl->cl_vtoff;
- cl->cl_vtoff = 0;
- }
+ rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
cl->cl_vtadj = 0;
cl->cl_vtperiod++; /* increment vt period */
@@ -745,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len)
/* compute myf */
cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
cl->cl_total);
- cl->cl_myfadj = 0;
}
}
@@ -779,8 +761,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
go_passive = 0;
/* update vt */
- cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
- - cl->cl_vtoff + cl->cl_vtadj;
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj;
/*
* if vt of the class is smaller than cvtmin,
@@ -795,9 +776,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
if (go_passive) {
/* no more active child, going passive */
- /* update cvtmax of the parent class */
- if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
- cl->cl_parent->cl_cvtmax = cl->cl_vt;
+ /* update cvtoff of the parent class */
+ if (cl->cl_vt > cl->cl_parent->cl_cvtoff)
+ cl->cl_parent->cl_cvtoff = cl->cl_vt;
/* remove this class from the vt tree */
vttree_remove(cl);
@@ -813,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
/* update f */
if (cl->cl_flags & HFSC_USC) {
+ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
cl->cl_total);
-#if 0
/*
* This code causes classes to stay way under their
* limit when multiple classes are used at gigabit
@@ -940,7 +922,7 @@ static void
hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
{
sc2isc(fsc, &cl->cl_fsc);
- rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
cl->cl_flags |= HFSC_FSC;
}
@@ -1094,7 +1076,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent->level == 0)
hfsc_purge_queue(sch, parent);
hfsc_adjust_levels(parent);
- cl->cl_pcvtoff = parent->cl_cvtoff;
sch_tree_unlock(sch);
qdisc_class_hash_grow(sch, &q->clhash);
@@ -1482,16 +1463,12 @@ hfsc_reset_class(struct hfsc_class *cl)
cl->cl_e = 0;
cl->cl_vt = 0;
cl->cl_vtadj = 0;
- cl->cl_vtoff = 0;
cl->cl_cvtmin = 0;
- cl->cl_cvtmax = 0;
cl->cl_cvtoff = 0;
- cl->cl_pcvtoff = 0;
cl->cl_vtperiod = 0;
cl->cl_parentperiod = 0;
cl->cl_f = 0;
cl->cl_myf = 0;
- cl->cl_myfadj = 0;
cl->cl_cfmin = 0;
cl->cl_nactive = 0;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b9439827c172..2bc8d7f8df16 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc_destroy(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
- qdisc_list_add(qdisc);
+ qdisc_hash_add(qdisc);
#endif
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 549c66359924..b5c502c78143 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch)
if (old)
qdisc_destroy(old);
if (ntx < dev->real_num_tx_queues)
- qdisc_list_add(qdisc);
+ qdisc_hash_add(qdisc);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
new file mode 100644
index 000000000000..6cff3f6d0c3a
--- /dev/null
+++ b/net/strparser/Kconfig
@@ -0,0 +1,4 @@
+
+config STREAM_PARSER
+ tristate
+ default n
diff --git a/net/strparser/Makefile b/net/strparser/Makefile
new file mode 100644
index 000000000000..858a126ebaa0
--- /dev/null
+++ b/net/strparser/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_STREAM_PARSER) += strparser.o
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
new file mode 100644
index 000000000000..5c7549b5b92c
--- /dev/null
+++ b/net/strparser/strparser.c
@@ -0,0 +1,510 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+
+static struct workqueue_struct *strp_wq;
+
+struct _strp_rx_msg {
+ /* Internal cb structure. struct strp_rx_msg must be first for passing
+ * to upper layer.
+ */
+ struct strp_rx_msg strp;
+ int accum_len;
+ int early_eaten;
+};
+
+static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+{
+ return (struct _strp_rx_msg *)((void *)skb->cb +
+ offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Lower lock held */
+static void strp_abort_rx_strp(struct strparser *strp, int err)
+{
+ struct sock *csk = strp->sk;
+
+ /* Unrecoverable error in receive */
+
+ del_timer(&strp->rx_msg_timer);
+
+ if (strp->rx_stopped)
+ return;
+
+ strp->rx_stopped = 1;
+
+ /* Report an error on the lower socket */
+ csk->sk_err = err;
+ csk->sk_error_report(csk);
+}
+
+static void strp_start_rx_timer(struct strparser *strp)
+{
+ if (strp->sk->sk_rcvtimeo)
+ mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+}
+
+/* Lower lock held */
+static void strp_parser_err(struct strparser *strp, int err,
+ read_descriptor_t *desc)
+{
+ desc->error = err;
+ kfree_skb(strp->rx_skb_head);
+ strp->rx_skb_head = NULL;
+ strp->cb.abort_parser(strp, err);
+}
+
+static inline int strp_peek_len(struct strparser *strp)
+{
+ struct socket *sock = strp->sk->sk_socket;
+
+ return sock->ops->peek_len(sock);
+}
+
+/* Lower socket lock held */
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+ unsigned int orig_offset, size_t orig_len)
+{
+ struct strparser *strp = (struct strparser *)desc->arg.data;
+ struct _strp_rx_msg *rxm;
+ struct sk_buff *head, *skb;
+ size_t eaten = 0, cand_len;
+ ssize_t extra;
+ int err;
+ bool cloned_orig = false;
+
+ if (strp->rx_paused)
+ return 0;
+
+ head = strp->rx_skb_head;
+ if (head) {
+ /* Message already in progress */
+
+ rxm = _strp_rx_msg(head);
+ if (unlikely(rxm->early_eaten)) {
+ /* Already some number of bytes on the receive sock
+ * data saved in rx_skb_head, just indicate they
+ * are consumed.
+ */
+ eaten = orig_len <= rxm->early_eaten ?
+ orig_len : rxm->early_eaten;
+ rxm->early_eaten -= eaten;
+
+ return eaten;
+ }
+
+ if (unlikely(orig_offset)) {
+ /* Getting data with a non-zero offset when a message is
+ * in progress is not expected. If it does happen, we
+ * need to clone and pull since we can't deal with
+ * offsets in the skbs for a message expect in the head.
+ */
+ orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!orig_skb) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ if (!pskb_pull(orig_skb, orig_offset)) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ kfree_skb(orig_skb);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ cloned_orig = true;
+ orig_offset = 0;
+ }
+
+ if (!strp->rx_skb_nextp) {
+ /* We are going to append to the frags_list of head.
+ * Need to unshare the frag_list.
+ */
+ err = skb_unclone(head, GFP_ATOMIC);
+ if (err) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ desc->error = err;
+ return 0;
+ }
+
+ if (unlikely(skb_shinfo(head)->frag_list)) {
+ /* We can't append to an sk_buff that already
+ * has a frag_list. We create a new head, point
+ * the frag_list of that to the old head, and
+ * then are able to use the old head->next for
+ * appending to the message.
+ */
+ if (WARN_ON(head->next)) {
+ desc->error = -EINVAL;
+ return 0;
+ }
+
+ skb = alloc_skb(0, GFP_ATOMIC);
+ if (!skb) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ skb->len = head->len;
+ skb->data_len = head->len;
+ skb->truesize = head->truesize;
+ *_strp_rx_msg(skb) = *_strp_rx_msg(head);
+ strp->rx_skb_nextp = &head->next;
+ skb_shinfo(skb)->frag_list = head;
+ strp->rx_skb_head = skb;
+ head = skb;
+ } else {
+ strp->rx_skb_nextp =
+ &skb_shinfo(head)->frag_list;
+ }
+ }
+ }
+
+ while (eaten < orig_len) {
+ /* Always clone since we will consume something */
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ break;
+ }
+
+ cand_len = orig_len - eaten;
+
+ head = strp->rx_skb_head;
+ if (!head) {
+ head = skb;
+ strp->rx_skb_head = head;
+ /* Will set rx_skb_nextp on next packet if needed */
+ strp->rx_skb_nextp = NULL;
+ rxm = _strp_rx_msg(head);
+ memset(rxm, 0, sizeof(*rxm));
+ rxm->strp.offset = orig_offset + eaten;
+ } else {
+ /* Unclone since we may be appending to an skb that we
+ * already share a frag_list with.
+ */
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err) {
+ STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ desc->error = err;
+ break;
+ }
+
+ rxm = _strp_rx_msg(head);
+ *strp->rx_skb_nextp = skb;
+ strp->rx_skb_nextp = &skb->next;
+ head->data_len += skb->len;
+ head->len += skb->len;
+ head->truesize += skb->truesize;
+ }
+
+ if (!rxm->strp.full_len) {
+ ssize_t len;
+
+ len = (*strp->cb.parse_msg)(strp, head);
+
+ if (!len) {
+ /* Need more header to determine length */
+ if (!rxm->accum_len) {
+ /* Start RX timer for new message */
+ strp_start_rx_timer(strp);
+ }
+ rxm->accum_len += cand_len;
+ eaten += cand_len;
+ STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+ WARN_ON(eaten != orig_len);
+ break;
+ } else if (len < 0) {
+ if (len == -ESTRPIPE && rxm->accum_len) {
+ len = -ENODATA;
+ strp->rx_unrecov_intr = 1;
+ } else {
+ strp->rx_interrupted = 1;
+ }
+ strp_parser_err(strp, err, desc);
+ break;
+ } else if (len > strp->sk->sk_rcvbuf) {
+ /* Message length exceeds maximum allowed */
+ STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+ strp_parser_err(strp, -EMSGSIZE, desc);
+ break;
+ } else if (len <= (ssize_t)head->len -
+ skb->len - rxm->strp.offset) {
+ /* Length must be into new skb (and also
+ * greater than zero)
+ */
+ STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+ strp_parser_err(strp, -EPROTO, desc);
+ break;
+ }
+
+ rxm->strp.full_len = len;
+ }
+
+ extra = (ssize_t)(rxm->accum_len + cand_len) -
+ rxm->strp.full_len;
+
+ if (extra < 0) {
+ /* Message not complete yet. */
+ if (rxm->strp.full_len - rxm->accum_len >
+ strp_peek_len(strp)) {
+ /* Don't have the whole messages in the socket
+ * buffer. Set strp->rx_need_bytes to wait for
+ * the rest of the message. Also, set "early
+ * eaten" since we've already buffered the skb
+ * but don't consume yet per strp_read_sock.
+ */
+
+ if (!rxm->accum_len) {
+ /* Start RX timer for new message */
+ strp_start_rx_timer(strp);
+ }
+
+ strp->rx_need_bytes = rxm->strp.full_len -
+ rxm->accum_len;
+ rxm->accum_len += cand_len;
+ rxm->early_eaten = cand_len;
+ STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+ desc->count = 0; /* Stop reading socket */
+ break;
+ }
+ rxm->accum_len += cand_len;
+ eaten += cand_len;
+ WARN_ON(eaten != orig_len);
+ break;
+ }
+
+ /* Positive extra indicates ore bytes than needed for the
+ * message
+ */
+
+ WARN_ON(extra > cand_len);
+
+ eaten += (cand_len - extra);
+
+ /* Hurray, we have a new message! */
+ del_timer(&strp->rx_msg_timer);
+ strp->rx_skb_head = NULL;
+ STRP_STATS_INCR(strp->stats.rx_msgs);
+
+ /* Give skb to upper layer */
+ strp->cb.rcv_msg(strp, head);
+
+ if (unlikely(strp->rx_paused)) {
+ /* Upper layer paused strp */
+ break;
+ }
+ }
+
+ if (cloned_orig)
+ kfree_skb(orig_skb);
+
+ STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+
+ return eaten;
+}
+
+static int default_read_sock_done(struct strparser *strp, int err)
+{
+ return err;
+}
+
+/* Called with lock held on lower socket */
+static int strp_read_sock(struct strparser *strp)
+{
+ struct socket *sock = strp->sk->sk_socket;
+ read_descriptor_t desc;
+
+ desc.arg.data = strp;
+ desc.error = 0;
+ desc.count = 1; /* give more than one skb per call */
+
+ /* sk should be locked here, so okay to do read_sock */
+ sock->ops->read_sock(strp->sk, &desc, strp_recv);
+
+ desc.error = strp->cb.read_sock_done(strp, desc.error);
+
+ return desc.error;
+}
+
+/* Lower sock lock held */
+void strp_data_ready(struct strparser *strp)
+{
+ if (unlikely(strp->rx_stopped))
+ return;
+
+ /* This check is needed to synchronize with do_strp_rx_work.
+ * do_strp_rx_work acquires a process lock (lock_sock) whereas
+ * the lock held here is bh_lock_sock. The two locks can be
+ * held by different threads at the same time, but bh_lock_sock
+ * allows a thread in BH context to safely check if the process
+ * lock is held. In this case, if the lock is held, queue work.
+ */
+ if (sock_owned_by_user(strp->sk)) {
+ queue_work(strp_wq, &strp->rx_work);
+ return;
+ }
+
+ if (strp->rx_paused)
+ return;
+
+ if (strp->rx_need_bytes) {
+ if (strp_peek_len(strp) >= strp->rx_need_bytes)
+ strp->rx_need_bytes = 0;
+ else
+ return;
+ }
+
+ if (strp_read_sock(strp) == -ENOMEM)
+ queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_data_ready);
+
+static void do_strp_rx_work(struct strparser *strp)
+{
+ read_descriptor_t rd_desc;
+ struct sock *csk = strp->sk;
+
+ /* We need the read lock to synchronize with strp_data_ready. We
+ * need the socket lock for calling strp_read_sock.
+ */
+ lock_sock(csk);
+
+ if (unlikely(strp->rx_stopped))
+ goto out;
+
+ if (strp->rx_paused)
+ goto out;
+
+ rd_desc.arg.data = strp;
+
+ if (strp_read_sock(strp) == -ENOMEM)
+ queue_work(strp_wq, &strp->rx_work);
+
+out:
+ release_sock(csk);
+}
+
+static void strp_rx_work(struct work_struct *w)
+{
+ do_strp_rx_work(container_of(w, struct strparser, rx_work));
+}
+
+static void strp_rx_msg_timeout(unsigned long arg)
+{
+ struct strparser *strp = (struct strparser *)arg;
+
+ /* Message assembly timed out */
+ STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
+ lock_sock(strp->sk);
+ strp->cb.abort_parser(strp, ETIMEDOUT);
+ release_sock(strp->sk);
+}
+
+int strp_init(struct strparser *strp, struct sock *csk,
+ struct strp_callbacks *cb)
+{
+ struct socket *sock = csk->sk_socket;
+
+ if (!cb || !cb->rcv_msg || !cb->parse_msg)
+ return -EINVAL;
+
+ if (!sock->ops->read_sock || !sock->ops->peek_len)
+ return -EAFNOSUPPORT;
+
+ memset(strp, 0, sizeof(*strp));
+
+ strp->sk = csk;
+
+ setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
+ (unsigned long)strp);
+
+ INIT_WORK(&strp->rx_work, strp_rx_work);
+
+ strp->cb.rcv_msg = cb->rcv_msg;
+ strp->cb.parse_msg = cb->parse_msg;
+ strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
+ strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(strp_init);
+
+void strp_unpause(struct strparser *strp)
+{
+ strp->rx_paused = 0;
+
+ /* Sync setting rx_paused with RX work */
+ smp_mb();
+
+ queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_unpause);
+
+/* strp must already be stopped so that strp_recv will no longer be called.
+ * Note that strp_done is not called with the lower socket held.
+ */
+void strp_done(struct strparser *strp)
+{
+ WARN_ON(!strp->rx_stopped);
+
+ del_timer_sync(&strp->rx_msg_timer);
+ cancel_work_sync(&strp->rx_work);
+
+ if (strp->rx_skb_head) {
+ kfree_skb(strp->rx_skb_head);
+ strp->rx_skb_head = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(strp_done);
+
+void strp_stop(struct strparser *strp)
+{
+ strp->rx_stopped = 1;
+}
+EXPORT_SYMBOL_GPL(strp_stop);
+
+void strp_check_rcv(struct strparser *strp)
+{
+ queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_check_rcv);
+
+static int __init strp_mod_init(void)
+{
+ strp_wq = create_singlethread_workqueue("kstrp");
+
+ return 0;
+}
+
+static void __exit strp_mod_exit(void)
+{
+}
+module_init(strp_mod_init);
+module_exit(strp_mod_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5fc9dd24aa9..1031a0327fff 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1292,12 +1292,10 @@ bool switchdev_port_same_parent_id(struct net_device *a,
struct switchdev_attr a_attr = {
.orig_dev = a,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- .flags = SWITCHDEV_F_NO_RECURSE,
};
struct switchdev_attr b_attr = {
.orig_dev = b,
.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
- .flags = SWITCHDEV_F_NO_RECURSE,
};
if (switchdev_port_attr_get(a, &a_attr) ||
@@ -1306,89 +1304,4 @@ bool switchdev_port_same_parent_id(struct net_device *a,
return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
}
-
-static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
- struct net_device *group_dev)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
- if (lower_dev == dev)
- continue;
- if (switchdev_port_same_parent_id(dev, lower_dev))
- return lower_dev->offload_fwd_mark;
- return switchdev_port_fwd_mark_get(dev, lower_dev);
- }
-
- return dev->ifindex;
-}
EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
-static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
- u32 old_mark, u32 *reset_mark)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
- if (lower_dev->offload_fwd_mark == old_mark) {
- if (!*reset_mark)
- *reset_mark = lower_dev->ifindex;
- lower_dev->offload_fwd_mark = *reset_mark;
- }
- switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
- }
-}
-
-/**
- * switchdev_port_fwd_mark_set - Set port offload forwarding mark
- *
- * @dev: port device
- * @group_dev: containing device
- * @joining: true if dev is joining group; false if leaving group
- *
- * An ungrouped port's offload mark is just its ifindex. A grouped
- * port's (member of a bridge, for example) offload mark is the ifindex
- * of one of the ports in the group with the same parent (switch) ID.
- * Ports on the same device in the same group will have the same mark.
- *
- * Example:
- *
- * br0 ifindex=9
- * sw1p1 ifindex=2 mark=2
- * sw1p2 ifindex=3 mark=2
- * sw2p1 ifindex=4 mark=5
- * sw2p2 ifindex=5 mark=5
- *
- * If sw2p2 leaves the bridge, we'll have:
- *
- * br0 ifindex=9
- * sw1p1 ifindex=2 mark=2
- * sw1p2 ifindex=3 mark=2
- * sw2p1 ifindex=4 mark=4
- * sw2p2 ifindex=5 mark=5
- */
-void switchdev_port_fwd_mark_set(struct net_device *dev,
- struct net_device *group_dev,
- bool joining)
-{
- u32 mark = dev->ifindex;
- u32 reset_mark = 0;
-
- if (group_dev) {
- ASSERT_RTNL();
- if (joining)
- mark = switchdev_port_fwd_mark_get(dev, group_dev);
- else if (dev->offload_fwd_mark == mark)
- /* Ohoh, this port was the mark reference port,
- * but it's leaving the group, so reset the
- * mark for the remaining ports in the group.
- */
- switchdev_port_fwd_mark_reset(group_dev, mark,
- &reset_mark);
- }
-
- dev->offload_fwd_mark = mark;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 46a71c701e7c..5bc1a3d57401 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head,
struct ctl_table *table)
{
struct net *net = container_of(head->set, struct net, sysctls);
- kuid_t root_uid = make_kuid(net->user_ns, 0);
- kgid_t root_gid = make_kgid(net->user_ns, 0);
/* Allow network administrator to have same access as root. */
- if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
- uid_eq(root_uid, current_euid())) {
+ if (ns_capable(net->user_ns, CAP_NET_ADMIN)) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
- /* Allow netns root group to have the same access as the root group */
- if (in_egroup_p(root_gid)) {
- int mode = (table->mode >> 3) & 7;
- return (mode << 3) | mode;
- }
+
return table->mode;
}
+static void net_ctl_set_ownership(struct ctl_table_header *head,
+ struct ctl_table *table,
+ kuid_t *uid, kgid_t *gid)
+{
+ struct net *net = container_of(head->set, struct net, sysctls);
+ kuid_t ns_root_uid;
+ kgid_t ns_root_gid;
+
+ ns_root_uid = make_kuid(net->user_ns, 0);
+ if (uid_valid(ns_root_uid))
+ *uid = ns_root_uid;
+
+ ns_root_gid = make_kgid(net->user_ns, 0);
+ if (gid_valid(ns_root_gid))
+ *gid = ns_root_gid;
+}
+
static struct ctl_table_root net_sysctl_root = {
.lookup = net_ctl_header_lookup,
.permissions = net_ctl_permissions,
+ .set_ownership = net_ctl_set_ownership,
};
static int __net_init sysctl_net_init(struct net *net)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 65b1bbf133bd..975dbeb60ab0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -42,6 +42,7 @@
#include "monitor.h"
#include "bcast.h"
#include "netlink.h"
+#include "udp_media.h"
#define MAX_ADDR_STR 60
@@ -56,6 +57,13 @@ static struct tipc_media * const media_info_array[] = {
NULL
};
+static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+}
+
static void bearer_disable(struct net *net, struct tipc_bearer *b);
/**
@@ -323,6 +331,7 @@ restart:
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = priority;
+ test_and_set_bit_lock(0, &b->up);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
@@ -360,15 +369,24 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
*/
void tipc_bearer_reset_all(struct net *net)
{
- struct tipc_net *tn = tipc_net(net);
struct tipc_bearer *b;
int i;
for (i = 0; i < MAX_BEARERS; i++) {
- b = rcu_dereference_rtnl(tn->bearer_list[i]);
+ b = bearer_get(net, i);
+ if (b)
+ clear_bit_unlock(0, &b->up);
+ }
+ for (i = 0; i < MAX_BEARERS; i++) {
+ b = bearer_get(net, i);
if (b)
tipc_reset_bearer(net, b);
}
+ for (i = 0; i < MAX_BEARERS; i++) {
+ b = bearer_get(net, i);
+ if (b)
+ test_and_set_bit_lock(0, &b->up);
+ }
}
/**
@@ -382,8 +400,9 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
int bearer_id = b->identity;
pr_info("Disabling bearer <%s>\n", b->name);
- b->media->disable_media(b);
+ clear_bit_unlock(0, &b->up);
tipc_node_delete_links(net, bearer_id);
+ b->media->disable_media(b);
RCU_INIT_POINTER(b->media_ptr, NULL);
if (b->link_req)
tipc_disc_delete(b->link_req);
@@ -440,22 +459,16 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
{
struct net_device *dev;
int delta;
- void *tipc_ptr;
dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
if (!dev)
return 0;
- /* Send RESET message even if bearer is detached from device */
- tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
- if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
- goto drop;
-
- delta = dev->hard_header_len - skb_headroom(skb);
- if ((delta > 0) &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
- goto drop;
-
+ delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+ if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ return 0;
+ }
skb_reset_network_header(skb);
skb->dev = dev;
skb->protocol = htons(ETH_P_TIPC);
@@ -463,9 +476,6 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
dev->dev_addr, skb->len);
dev_queue_xmit(skb);
return 0;
-drop:
- kfree_skb(skb);
- return 0;
}
int tipc_bearer_mtu(struct net *net, u32 bearer_id)
@@ -487,12 +497,12 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
struct sk_buff *skb,
struct tipc_media_addr *dest)
{
- struct tipc_net *tn = tipc_net(net);
+ struct tipc_msg *hdr = buf_msg(skb);
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (likely(b))
+ b = bearer_get(net, bearer_id);
+ if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
b->media->send_msg(net, skb, b, dest);
else
kfree_skb(skb);
@@ -505,7 +515,6 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
struct tipc_media_addr *dst)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
@@ -513,12 +522,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
return;
rcu_read_lock();
- b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ b = bearer_get(net, bearer_id);
if (unlikely(!b))
__skb_queue_purge(xmitq);
skb_queue_walk_safe(xmitq, skb, tmp) {
__skb_dequeue(xmitq);
- b->media->send_msg(net, skb, b, dst);
+ if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
+ b->media->send_msg(net, skb, b, dst);
+ else
+ kfree_skb(skb);
}
rcu_read_unlock();
}
@@ -535,8 +547,8 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct tipc_msg *hdr;
rcu_read_lock();
- b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (unlikely(!b))
+ b = bearer_get(net, bearer_id);
+ if (unlikely(!b || !test_bit(0, &b->up)))
__skb_queue_purge(xmitq);
skb_queue_walk_safe(xmitq, skb, tmp) {
hdr = buf_msg(skb);
@@ -566,7 +578,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
rcu_read_lock();
b = rcu_dereference_rtnl(dev->tipc_ptr);
- if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) {
+ if (likely(b && test_bit(0, &b->up) &&
+ (skb->pkt_type <= PACKET_BROADCAST))) {
skb->next = NULL;
tipc_rcv(dev_net(dev), skb, b);
rcu_read_unlock();
@@ -591,18 +604,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct tipc_net *tn = tipc_net(net);
struct tipc_bearer *b;
- int i;
b = rtnl_dereference(dev->tipc_ptr);
- if (!b) {
- for (i = 0; i < MAX_BEARERS; b = NULL, i++) {
- b = rtnl_dereference(tn->bearer_list[i]);
- if (b && (b->media_ptr == dev))
- break;
- }
- }
if (!b)
return NOTIFY_DONE;
@@ -613,11 +617,10 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
if (netif_carrier_ok(dev))
break;
case NETDEV_UP:
- rcu_assign_pointer(dev->tipc_ptr, b);
+ test_and_set_bit_lock(0, &b->up);
break;
case NETDEV_GOING_DOWN:
- RCU_INIT_POINTER(dev->tipc_ptr, NULL);
- synchronize_net();
+ clear_bit_unlock(0, &b->up);
tipc_reset_bearer(net, b);
break;
case NETDEV_CHANGEMTU:
@@ -709,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
goto prop_msg_full;
nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+ if (tipc_udp_nl_add_bearer_data(msg, bearer))
+ goto attr_msg_full;
+ }
+#endif
+
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
@@ -895,6 +906,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
return 0;
}
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct tipc_bearer *b;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ rtnl_lock();
+ b = tipc_bearer_find(net, name);
+ if (!b) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ err = tipc_udp_nl_bearer_add(b,
+ attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+ if (err) {
+ rtnl_unlock();
+ return err;
+ }
+ }
+#endif
+ rtnl_unlock();
+
+ return 0;
+}
+
int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
{
int err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 43757f1f9cb3..78892e2f53e3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -150,6 +150,7 @@ struct tipc_bearer {
u32 identity;
struct tipc_link_req *link_req;
char net_plane;
+ unsigned long up;
};
struct tipc_bearer_names {
@@ -180,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 877d94f34814..2c6e1b9e024b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -807,7 +807,7 @@ void link_prepare_wakeup(struct tipc_link *l)
skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
imp = TIPC_SKB_CB(skb)->chain_imp;
- lim = l->window + l->backlog[imp].limit;
+ lim = l->backlog[imp].limit;
pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
if ((pnd[imp] + l->backlog[imp].len) >= lim)
break;
@@ -873,9 +873,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff *skb, *_skb, *bskb;
/* Match msg importance against this and all higher backlog limits: */
- for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
- if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
- return link_schedule_user(l, list);
+ if (!skb_queue_empty(backlogq)) {
+ for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+ if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+ return link_schedule_user(l, list);
+ }
}
if (unlikely(msg_size(hdr) > mtu)) {
skb_queue_purge(list);
@@ -1692,10 +1694,10 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
l->window = win;
- l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2;
- l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win;
- l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3;
- l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2;
+ l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win);
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2);
+ l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3);
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 77a7a118911d..c7c254902873 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -39,6 +39,8 @@
#include <net/genetlink.h>
+extern const struct nla_policy tipc_nl_net_policy[];
+
int tipc_net_start(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index a84daec0afe9..3200059d14b2 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -41,6 +41,7 @@
#include "link.h"
#include "node.h"
#include "net.h"
+#include "udp_media.h"
#include <net/genetlink.h>
static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
@@ -161,6 +162,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
.policy = tipc_nl_policy,
},
{
+ .cmd = TIPC_NL_BEARER_ADD,
+ .doit = tipc_nl_bearer_add,
+ .policy = tipc_nl_policy,
+ },
+ {
.cmd = TIPC_NL_BEARER_SET,
.doit = tipc_nl_bearer_set,
.policy = tipc_nl_policy,
@@ -238,6 +244,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
.dumpit = tipc_nl_node_dump_monitor_peer,
.policy = tipc_nl_policy,
},
+ {
+ .cmd = TIPC_NL_PEER_REMOVE,
+ .doit = tipc_nl_peer_rm,
+ .policy = tipc_nl_policy,
+ },
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ {
+ .cmd = TIPC_NL_UDP_GET_REMOTEIP,
+ .dumpit = tipc_udp_nl_dump_remoteip,
+ .policy = tipc_nl_policy,
+ },
+#endif
};
int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 21974191e425..7e8b75fd1a02 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1553,6 +1553,69 @@ discard:
kfree_skb(skb);
}
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+ struct tipc_node *peer;
+ u32 addr;
+ int err;
+ int i;
+
+ /* We identify the peer by its net */
+ if (!info->attrs[TIPC_NLA_NET])
+ return -EINVAL;
+
+ err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
+ info->attrs[TIPC_NLA_NET],
+ tipc_nl_net_policy);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_NET_ADDR])
+ return -EINVAL;
+
+ addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+
+ if (in_own_node(net, addr))
+ return -ENOTSUPP;
+
+ spin_lock_bh(&tn->node_list_lock);
+ peer = tipc_node_find(net, addr);
+ if (!peer) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return -ENXIO;
+ }
+
+ tipc_node_write_lock(peer);
+ if (peer->state != SELF_DOWN_PEER_DOWN &&
+ peer->state != SELF_DOWN_PEER_LEAVING) {
+ tipc_node_write_unlock(peer);
+ err = -EBUSY;
+ goto err_out;
+ }
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ struct tipc_link_entry *le = &peer->links[i];
+
+ if (le->link) {
+ kfree(le->link);
+ le->link = NULL;
+ peer->link_cnt--;
+ }
+ }
+ tipc_node_write_unlock(peer);
+ tipc_node_delete(peer);
+
+ err = 0;
+err_out:
+ tipc_node_put(peer);
+ spin_unlock_bh(&tn->node_list_lock);
+
+ return err;
+}
+
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index d69fdfcc0ec9..4578b34c7dca 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -77,6 +77,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ae7e14cae085..dd274687a53d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -49,6 +49,7 @@
#include "core.h"
#include "bearer.h"
#include "netlink.h"
+#include "msg.h"
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
@@ -70,6 +71,13 @@ struct udp_media_addr {
};
};
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+ struct udp_media_addr addr;
+ struct rcu_head rcu;
+ struct list_head list;
+};
+
/**
* struct udp_bearer - ip/udp bearer data structure
* @bearer: associated generic tipc bearer
@@ -82,8 +90,20 @@ struct udp_bearer {
struct socket *ubsock;
u32 ifindex;
struct work_struct work;
+ struct udp_replicast rcast;
};
+static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
+{
+ if (ntohs(addr->proto) == ETH_P_IP)
+ return ipv4_is_multicast(addr->ipv4.s_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ return ipv6_addr_is_multicast(&addr->ipv6);
+#endif
+ return 0;
+}
+
/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
struct udp_media_addr *ua)
@@ -91,15 +111,9 @@ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
memset(addr, 0, sizeof(struct tipc_media_addr));
addr->media_id = TIPC_MEDIA_TYPE_UDP;
memcpy(addr->value, ua, sizeof(struct udp_media_addr));
- if (ntohs(ua->proto) == ETH_P_IP) {
- if (ipv4_is_multicast(ua->ipv4.s_addr))
- addr->broadcast = 1;
- } else if (ntohs(ua->proto) == ETH_P_IPV6) {
- if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
- addr->broadcast = 1;
- } else {
- pr_err("Invalid UDP media address\n");
- }
+
+ if (tipc_udp_is_mcast_addr(ua))
+ addr->broadcast = 1;
}
/* tipc_udp_addr2str - convert ip/udp address to string */
@@ -140,28 +154,13 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
}
/* tipc_send_msg - enqueue a send request */
-static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
- struct tipc_bearer *b,
- struct tipc_media_addr *dest)
+static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+ struct udp_bearer *ub, struct udp_media_addr *src,
+ struct udp_media_addr *dst)
{
int ttl, err = 0;
- struct udp_bearer *ub;
- struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
- struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
struct rtable *rt;
- if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
- err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
- if (err)
- goto tx_error;
- }
-
- skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
- ub = rcu_dereference_rtnl(b->media_ptr);
- if (!ub) {
- err = -ENODEV;
- goto tx_error;
- }
if (dst->proto == htons(ETH_P_IP)) {
struct flowi4 fl = {
.daddr = dst->ipv4.s_addr,
@@ -207,29 +206,178 @@ tx_error:
return err;
}
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *addr)
+{
+ struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+ struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+ struct udp_replicast *rcast;
+ struct udp_bearer *ub;
+ int err = 0;
+
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (err)
+ goto out;
+ }
+
+ skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!addr->broadcast || list_empty(&ub->rcast.list))
+ return tipc_udp_xmit(net, skb, ub, src, dst);
+
+ /* Replicast, send an skb to each configured IP address */
+ list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+ struct sk_buff *_skb;
+
+ _skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!_skb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+ if (err) {
+ kfree_skb(_skb);
+ goto out;
+ }
+ }
+ err = 0;
+out:
+ kfree_skb(skb);
+ return err;
+}
+
+static bool tipc_udp_is_known_peer(struct tipc_bearer *b,
+ struct udp_media_addr *addr)
+{
+ struct udp_replicast *rcast, *tmp;
+ struct udp_bearer *ub;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ pr_err_ratelimited("UDP bearer instance not found\n");
+ return false;
+ }
+
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr)))
+ return true;
+ }
+
+ return false;
+}
+
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+ struct udp_media_addr *addr)
+{
+ struct udp_replicast *rcast;
+ struct udp_bearer *ub;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub)
+ return -ENODEV;
+
+ rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+ if (!rcast)
+ return -ENOMEM;
+
+ memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+ if (ntohs(addr->proto) == ETH_P_IP)
+ pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (ntohs(addr->proto) == ETH_P_IPV6)
+ pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+
+ list_add_rcu(&rcast->list, &ub->rcast.list);
+ return 0;
+}
+
+static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb)
+{
+ struct udp_media_addr src = {0};
+ struct udp_media_addr *dst;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (tipc_udp_is_mcast_addr(dst))
+ return 0;
+
+ src.port = udp_hdr(skb)->source;
+
+ if (ip_hdr(skb)->version == 4) {
+ struct iphdr *iphdr = ip_hdr(skb);
+
+ src.proto = htons(ETH_P_IP);
+ src.ipv4.s_addr = iphdr->saddr;
+ if (ipv4_is_multicast(iphdr->daddr))
+ return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (ip_hdr(skb)->version == 6) {
+ struct ipv6hdr *iphdr = ipv6_hdr(skb);
+
+ src.proto = htons(ETH_P_IPV6);
+ src.ipv6 = iphdr->saddr;
+ if (ipv6_addr_is_multicast(&iphdr->daddr))
+ return 0;
+#endif
+ } else {
+ return 0;
+ }
+
+ if (likely(tipc_udp_is_known_peer(b, &src)))
+ return 0;
+
+ return tipc_udp_rcast_add(b, &src);
+}
+
/* tipc_udp_recv - read data from bearer socket */
static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct udp_bearer *ub;
struct tipc_bearer *b;
+ struct tipc_msg *hdr;
+ int err;
ub = rcu_dereference_sk_user_data(sk);
if (!ub) {
pr_err_ratelimited("Failed to get UDP bearer reference");
- kfree_skb(skb);
- return 0;
+ goto out;
}
-
skb_pull(skb, sizeof(struct udphdr));
+ hdr = buf_msg(skb);
+
rcu_read_lock();
b = rcu_dereference_rtnl(ub->bearer);
+ if (!b)
+ goto rcu_out;
- if (b) {
+ if (b && test_bit(0, &b->up)) {
tipc_rcv(sock_net(sk), skb, b);
rcu_read_unlock();
return 0;
}
+
+ if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
+ err = tipc_udp_rcast_disc(b, skb);
+ if (err)
+ goto rcu_out;
+ }
+
+ tipc_rcv(sock_net(sk), skb, b);
rcu_read_unlock();
+ return 0;
+
+rcu_out:
+ rcu_read_unlock();
+out:
kfree_skb(skb);
return 0;
}
@@ -241,15 +389,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
struct sock *sk = ub->ubsock->sk;
if (ntohs(remote->proto) == ETH_P_IP) {
- if (!ipv4_is_multicast(remote->ipv4.s_addr))
- return 0;
mreqn.imr_multiaddr = remote->ipv4;
mreqn.imr_ifindex = ub->ifindex;
err = ip_mc_join_group(sk, &mreqn);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- if (!ipv6_addr_is_multicast(&remote->ipv6))
- return 0;
err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
&remote->ipv6);
#endif
@@ -257,75 +401,234 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
return err;
}
-/**
- * parse_options - build local/remote addresses from configuration
- * @attrs: netlink config data
- * @ub: UDP bearer instance
- * @local: local bearer IP address/port
- * @remote: peer or multicast IP/port
- */
-static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
- struct udp_media_addr *local,
- struct udp_media_addr *remote)
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+ struct udp_media_addr *addr, int nla_t)
{
- struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
- struct sockaddr_storage sa_local, sa_remote;
+ if (ntohs(addr->proto) == ETH_P_IP) {
+ struct sockaddr_in ip4;
- if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
- goto err;
- if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
- attrs[TIPC_NLA_BEARER_UDP_OPTS],
- tipc_nl_udp_policy))
- goto err;
- if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
- nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
- sizeof(sa_local));
- nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
- sizeof(sa_remote));
+ ip4.sin_family = AF_INET;
+ ip4.sin_port = addr->port;
+ ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+ if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+ return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (ntohs(addr->proto) == ETH_P_IPV6) {
+ struct sockaddr_in6 ip6;
+
+ ip6.sin6_family = AF_INET6;
+ ip6.sin6_port = addr->port;
+ memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+ if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+ return -EMSGSIZE;
+#endif
+ }
+
+ return 0;
+}
+
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ u32 bid = cb->args[0];
+ u32 skip_cnt = cb->args[1];
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct udp_replicast *rcast, *tmp;
+ struct tipc_bearer *b;
+ struct udp_bearer *ub;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!bid && !skip_cnt) {
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr **attrs;
+ char *bname;
+
+ err = tipc_nlmsg_parse(cb->nlh, &attrs);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
+ attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy);
+ if (err)
+ return err;
+
+ if (!battrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+
+ bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]);
+
+ rtnl_lock();
+ b = tipc_bearer_find(net, bname);
+ if (!b) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+ bid = b->identity;
} else {
-err:
- pr_err("Invalid UDP bearer configuration");
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+ rtnl_lock();
+ b = rtnl_dereference(tn->bearer_list[bid]);
+ if (!b) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+ }
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ rtnl_unlock();
return -EINVAL;
}
- if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
- struct sockaddr_in *ip4;
-
- ip4 = (struct sockaddr_in *)&sa_local;
- local->proto = htons(ETH_P_IP);
- local->port = ip4->sin_port;
- local->ipv4.s_addr = ip4->sin_addr.s_addr;
-
- ip4 = (struct sockaddr_in *)&sa_remote;
- remote->proto = htons(ETH_P_IP);
- remote->port = ip4->sin_port;
- remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+ i = 0;
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ if (i < skip_cnt)
+ goto count;
+
+ hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq,
+ &tipc_genl_family, NLM_F_MULTI,
+ TIPC_NL_BEARER_GET);
+ if (!hdr)
+ goto done;
+
+ err = __tipc_nl_add_udp_addr(skb, &rcast->addr,
+ TIPC_NLA_UDP_REMOTE);
+ if (err) {
+ genlmsg_cancel(skb, hdr);
+ goto done;
+ }
+ genlmsg_end(skb, hdr);
+count:
+ i++;
+ }
+done:
+ rtnl_unlock();
+ cb->args[0] = bid;
+ cb->args[1] = i;
+
+ return skb->len;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+ struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+ struct udp_media_addr *dst;
+ struct udp_bearer *ub;
+ struct nlattr *nest;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub)
+ return -ENODEV;
+
+ nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+ if (!nest)
+ goto msg_full;
+
+ if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+ goto msg_full;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+ goto msg_full;
+
+ if (!list_empty(&ub->rcast.list)) {
+ if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+ goto msg_full;
+ }
+
+ nla_nest_end(msg->skb, nest);
+ return 0;
+msg_full:
+ nla_nest_cancel(msg->skb, nest);
+ return -EMSGSIZE;
+}
+
+/**
+ * tipc_parse_udp_addr - build udp media address from netlink data
+ * @nlattr: netlink attribute containing sockaddr storage aligned address
+ * @addr: tipc media address to fill with address, port and protocol type
+ * @scope_id: IPv6 scope id pointer, not NULL indicates it's required
+ */
+
+static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
+ u32 *scope_id)
+{
+ struct sockaddr_storage sa;
+
+ nla_memcpy(&sa, nla, sizeof(sa));
+ if (sa.ss_family == AF_INET) {
+ struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa;
+
+ addr->proto = htons(ETH_P_IP);
+ addr->port = ip4->sin_port;
+ addr->ipv4.s_addr = ip4->sin_addr.s_addr;
return 0;
#if IS_ENABLED(CONFIG_IPV6)
- } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
- int atype;
- struct sockaddr_in6 *ip6;
+ } else if (sa.ss_family == AF_INET6) {
+ struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa;
- ip6 = (struct sockaddr_in6 *)&sa_local;
- atype = ipv6_addr_type(&ip6->sin6_addr);
- if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
- return -EINVAL;
+ addr->proto = htons(ETH_P_IPV6);
+ addr->port = ip6->sin6_port;
+ memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
+
+ /* Scope ID is only interesting for local addresses */
+ if (scope_id) {
+ int atype;
- local->proto = htons(ETH_P_IPV6);
- local->port = ip6->sin6_port;
- memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
- ub->ifindex = ip6->sin6_scope_id;
+ atype = ipv6_addr_type(&ip6->sin6_addr);
+ if (__ipv6_addr_needs_scope_id(atype) &&
+ !ip6->sin6_scope_id) {
+ return -EINVAL;
+ }
+
+ *scope_id = ip6->sin6_scope_id ? : 0;
+ }
- ip6 = (struct sockaddr_in6 *)&sa_remote;
- remote->proto = htons(ETH_P_IPV6);
- remote->port = ip6->sin6_port;
- memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
return 0;
#endif
}
return -EADDRNOTAVAIL;
}
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+ int err;
+ struct udp_media_addr addr = {0};
+ struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+ struct udp_media_addr *dst;
+
+ if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+ return -EINVAL;
+
+ if (!opts[TIPC_NLA_UDP_REMOTE])
+ return -EINVAL;
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+ if (err)
+ return err;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (tipc_udp_is_mcast_addr(dst)) {
+ pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+ return -EINVAL;
+ }
+
+ if (tipc_udp_is_known_peer(b, &addr))
+ return 0;
+
+ return tipc_udp_rcast_add(b, &addr);
+}
+
/**
* tipc_udp_enable - callback to create a new udp bearer instance
* @net: network namespace
@@ -340,18 +643,37 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
{
int err = -EINVAL;
struct udp_bearer *ub;
- struct udp_media_addr *remote;
+ struct udp_media_addr remote = {0};
struct udp_media_addr local = {0};
struct udp_port_cfg udp_conf = {0};
struct udp_tunnel_sock_cfg tuncfg = {NULL};
+ struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
if (!ub)
return -ENOMEM;
- remote = (struct udp_media_addr *)&b->bcast_addr.value;
- memset(remote, 0, sizeof(struct udp_media_addr));
- err = parse_options(attrs, ub, &local, remote);
+ INIT_LIST_HEAD(&ub->rcast.list);
+
+ if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+ goto err;
+
+ if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+ attrs[TIPC_NLA_BEARER_UDP_OPTS],
+ tipc_nl_udp_policy))
+ goto err;
+
+ if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
+ pr_err("Invalid UDP bearer configuration");
+ return -EINVAL;
+ }
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
+ &ub->ifindex);
+ if (err)
+ goto err;
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
if (err)
goto err;
@@ -396,9 +718,18 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
tuncfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
- err = enable_mcast(ub, remote);
+ /**
+ * The bcast media address port is used for all peers and the ip
+ * is used if it's a multicast address.
+ */
+ memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+ if (tipc_udp_is_mcast_addr(&remote))
+ err = enable_mcast(ub, &remote);
+ else
+ err = tipc_udp_rcast_add(b, &remote);
if (err)
goto err;
+
return 0;
err:
if (ub->ubsock)
@@ -411,6 +742,12 @@ err:
static void cleanup_bearer(struct work_struct *work)
{
struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+ struct udp_replicast *rcast, *tmp;
+
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ list_del_rcu(&rcast->list);
+ kfree_rcu(rcast, rcu);
+ }
if (ub->ubsock)
udp_tunnel_sock_release(ub->ubsock);
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
new file mode 100644
index 000000000000..281bbae87726
--- /dev/null
+++ b/net/tipc/udp_media.h
@@ -0,0 +1,46 @@
+/*
+ * net/tipc/udp_media.h: Include file for UDP bearer media
+ *
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif
+#endif
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7645e97362c0..2029b49a1df3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -906,6 +906,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
if (WARN_ON(wdev->netdev))
return;
+ nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+
list_del_rcu(&wdev->list);
rdev->devlist_generation++;
@@ -1079,6 +1081,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
dev->priv_flags |= IFF_DONT_BRIDGE;
+
+ nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
break;
case NETDEV_GOING_DOWN:
cfg80211_leave(rdev, wdev);
@@ -1157,6 +1161,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
* remove and clean it up.
*/
if (!list_empty(&wdev->list)) {
+ nl80211_notify_iface(rdev, wdev,
+ NL80211_CMD_DEL_INTERFACE);
sysfs_remove_link(&dev->dev.kobj, "phy80211");
list_del_rcu(&wdev->list);
rdev->devlist_generation++;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f02653a08993..499785778983 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2751,7 +2751,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct vif_params params;
struct wireless_dev *wdev;
- struct sk_buff *msg, *event;
+ struct sk_buff *msg;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
u32 flags;
@@ -2855,20 +2855,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return -ENOBUFS;
}
- event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (event) {
- if (nl80211_send_iface(event, 0, 0, 0,
- rdev, wdev, false) < 0) {
- nlmsg_free(event);
- goto out;
- }
-
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- event, 0, NL80211_MCGRP_CONFIG,
- GFP_KERNEL);
- }
+ /*
+ * For wdevs which have no associated netdev object (e.g. of type
+ * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+ * For all other types, the event will be generated from the
+ * netdev notifier
+ */
+ if (!wdev->netdev)
+ nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
-out:
return genlmsg_reply(msg, info);
}
@@ -2876,18 +2871,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
- struct sk_buff *msg;
- int status;
if (!rdev->ops->del_virtual_intf)
return -EOPNOTSUPP;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
- nlmsg_free(msg);
- msg = NULL;
- }
-
/*
* If we remove a wireless device without a netdev then clear
* user_ptr[1] so that nl80211_post_doit won't dereference it
@@ -2898,15 +2885,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
if (!wdev->netdev)
info->user_ptr[1] = NULL;
- status = rdev_del_virtual_intf(rdev, wdev);
- if (status >= 0 && msg)
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- msg, 0, NL80211_MCGRP_CONFIG,
- GFP_KERNEL);
- else
- nlmsg_free(msg);
-
- return status;
+ return rdev_del_virtual_intf(rdev, wdev);
}
static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -5374,6 +5353,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou
return 0;
}
+static int nl80211_check_power_mode(const struct nlattr *nla,
+ enum nl80211_mesh_power_mode min,
+ enum nl80211_mesh_power_mode max,
+ enum nl80211_mesh_power_mode *out)
+{
+ u32 val = nla_get_u32(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
static int nl80211_parse_mesh_config(struct genl_info *info,
struct mesh_config *cfg,
u32 *mask_out)
@@ -5518,7 +5509,7 @@ do { \
NL80211_MESH_POWER_ACTIVE,
NL80211_MESH_POWER_MAX,
mask, NL80211_MESHCONF_POWER_MODE,
- nl80211_check_u32);
+ nl80211_check_power_mode);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
0, 65535, mask,
NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@ -7773,12 +7764,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
ibss.beacon_interval = 100;
- if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+ if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
ibss.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
- return -EINVAL;
- }
+
+ err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+ if (err)
+ return err;
if (!rdev->ops->join_ibss)
return -EOPNOTSUPP;
@@ -9252,9 +9244,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
setup.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- if (setup.beacon_interval < 10 ||
- setup.beacon_interval > 10000)
- return -EINVAL;
+
+ err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+ if (err)
+ return err;
}
if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@ -11847,6 +11840,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
NL80211_MCGRP_CONFIG, GFP_KERNEL);
}
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ enum nl80211_commands cmd)
+{
+ struct sk_buff *msg;
+
+ WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+ cmd != NL80211_CMD_DEL_INTERFACE);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+ cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
static int nl80211_add_scan_req(struct sk_buff *msg,
struct cfg80211_registered_device *rdev)
{
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a63f402b10b7..7e3821d7fcc5 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -7,6 +7,9 @@ int nl80211_init(void);
void nl80211_exit(void);
void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
enum nl80211_commands cmd);
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ enum nl80211_commands cmd);
void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b7d1592bd5b8..0675f513e7b9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1559,7 +1559,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev;
int res = 0;
- if (!beacon_int)
+ if (beacon_int < 10 || beacon_int > 10000)
return -EINVAL;
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {