From 8d50b53d66a8a6ae41bafbdcabe401467803f33a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Jul 2008 02:37:46 -0700
Subject: pkt_sched: Fix OOPS on ingress qdisc add.

Bug report from Steven Jan Springl:

	Issuing the following command causes a kernel oops:
		tc qdisc add dev eth0 handle ffff: ingress

The problem mostly stems from all of the special case handling of
ingress qdiscs.

So, to fix this, do the grafting operation the same way we do for TX
qdiscs.  Which means that dev_activate() and dev_deactivate() now do
the "qdisc_sleeping <--> qdisc" transitions on dev->rx_queue too.

Future simplifications are possible now, mainly because it is
impossible for dev_queue->{qdisc,qdisc_sleeping} to be NULL.  There
are NULL checks all over to handle the ingress qdisc special case
that used to exist before this commit.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8d13a9b9f1df..63d6bcddbf46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2100,7 +2100,7 @@ static int ing_filter(struct sk_buff *skb)
 	rxq = &dev->rx_queue;
 
 	q = rxq->qdisc;
-	if (q) {
+	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		result = qdisc_enqueue_root(skb, q);
 		spin_unlock(qdisc_lock(q));
@@ -2113,7 +2113,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (!skb->dev->rx_queue.qdisc)
+	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
-- 
cgit 


From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 31 Jul 2008 16:58:50 -0700
Subject: netdev: Fix lockdep warnings in multiqueue configurations.

When support for multiple TX queues were added, the
netif_tx_lock() routines we converted to iterate over
all TX queues and grab each queue's spinlock.

This causes heartburn for lockdep and it's not a healthy
thing to do with lots of TX queues anyways.

So modify this to use a top-level lock and a "frozen"
state for the individual TX queues.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 63d6bcddbf46..69320a56a084 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev)
 {
 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
 }
 
 /**
-- 
cgit 


From 5fb662297b8a4bdadd60371c34b760efca948ebc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 2 Aug 2008 20:02:43 -0700
Subject: pkt_sched: Use qdisc_lock() on already sampled root qdisc.

Based upon a bug report by Jeff Kirsher.

Don't use qdisc_root_lock() in these cases as the root
qdisc could have been changed, and we'd thus lock the
wrong object.

Tested by Emil S Tantilov who confirms that this seems
to fix the problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 69320a56a084..da7acacf02b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1796,7 +1796,7 @@ gso:
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		spinlock_t *root_lock = qdisc_root_lock(q);
+		spinlock_t *root_lock = qdisc_lock(q);
 
 		spin_lock(root_lock);
 
@@ -1995,7 +1995,7 @@ static void net_tx_action(struct softirq_action *h)
 			smp_mb__before_clear_bit();
 			clear_bit(__QDISC_STATE_SCHED, &q->state);
 
-			root_lock = qdisc_root_lock(q);
+			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
 				qdisc_run(q);
 				spin_unlock(root_lock);
-- 
cgit 


From e5a4a72d4f88f4389e9340d383ca67031d1b8536 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Sun, 3 Aug 2008 01:23:10 -0700
Subject: net: use software GSO for SG+CSUM capable netdevices

If a netdevice does not support hardware GSO, allowing the stack to
use GSO anyway and then splitting the GSO skb into MSS-sized pieces
as it is handed to the netdevice for transmitting is likely still
a win as far as throughput and/or CPU usage are concerned, since it
reduces the number of trips through the output path.

This patch enables the use of GSO on any netdevice that supports SG.
If a GSO skb is then sent to a netdevice that supports SG but does not
support hardware GSO, net/core/dev.c:dev_hard_start_xmit() will take
care of doing the necessary GSO segmentation in software.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index da7acacf02b5..cbf80098980c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3988,6 +3988,10 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
+	/* Enable software GSO if SG is supported. */
+	if (dev->features & NETIF_F_SG)
+		dev->features |= NETIF_F_GSO;
+
 	netdev_initialize_kobject(dev);
 	ret = netdev_register_kobject(dev);
 	if (ret)
-- 
cgit 


From 6e583ce5242f32e925dcb198f7123256d0798370 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sun, 3 Aug 2008 21:29:57 -0700
Subject: net: eliminate refcounting in backlog queue

Avoid the overhead of atomic increment/decrement on each received packet.
This helps performance of non-NAPI devices (like loopback).
Use cleanup function to walk queue on each cpu and clean out any
left over packets.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index cbf80098980c..fc6c9881eca8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1909,7 +1909,6 @@ int netif_rx(struct sk_buff *skb)
 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
-			dev_hold(skb->dev);
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2270,6 +2269,20 @@ out:
 	return ret;
 }
 
+/* Network device is going away, flush any packets still pending  */
+static void flush_backlog(void *arg)
+{
+	struct net_device *dev = arg;
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &queue->input_pkt_queue);
+			kfree_skb(skb);
+		}
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2279,7 +2292,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
-		struct net_device *dev;
 
 		local_irq_disable();
 		skb = __skb_dequeue(&queue->input_pkt_queue);
@@ -2288,14 +2300,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			local_irq_enable();
 			break;
 		}
-
 		local_irq_enable();
 
-		dev = skb->dev;
-
 		netif_receive_skb(skb);
-
-		dev_put(dev);
 	} while (++work < quota && jiffies == start_time);
 
 	return work;
@@ -4169,6 +4176,8 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
+		on_each_cpu(flush_backlog, dev, 1);
+
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-- 
cgit 


From c27f339af90bb874a7a9c680b17abfd32d4a727b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 4 Aug 2008 22:39:11 -0700
Subject: net_sched: Add qdisc __NET_XMIT_BYPASS flag

Patrick McHardy <kaber@trash.net> noticed that it would be nice to
handle NET_XMIT_BYPASS by NET_XMIT_SUCCESS with an internal qdisc flag
__NET_XMIT_BYPASS and to remove the mapping from dev_queue_xmit().

David Miller <davem@davemloft.net> spotted a serious bug in the first
version of this patch.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index fc6c9881eca8..01993ad74e76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1805,7 +1805,6 @@ gso:
 
 		spin_unlock(root_lock);
 
-		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 		goto out;
 	}
 
-- 
cgit 


From cc9bd5cebc0825e0fabc0186ab85806a0891104f Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:00 -0700
Subject: net/core: Uninline skb_bond().

Otherwise subsequent changes need multiple return values.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 01993ad74e76..4a09833331f1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1939,22 +1939,6 @@ int netif_rx_ni(struct sk_buff *skb)
 
 EXPORT_SYMBOL(netif_rx_ni);
 
-static inline struct net_device *skb_bond(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-
-	if (dev->master) {
-		if (skb_bond_should_drop(skb)) {
-			kfree_skb(skb);
-			return NULL;
-		}
-		skb->dev = dev->master;
-	}
-
-	return dev;
-}
-
-
 static void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -2194,10 +2178,14 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->iif)
 		skb->iif = skb->dev->ifindex;
 
-	orig_dev = skb_bond(skb);
-
-	if (!orig_dev)
-		return NET_RX_DROP;
+	orig_dev = skb->dev;
+	if (orig_dev->master) {
+		if (skb_bond_should_drop(skb)) {
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+		skb->dev = orig_dev->master;
+	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-- 
cgit 


From 0d7a3681232f545c6a59f77e60f7667673ef0e93 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:01 -0700
Subject: net/core: Allow certain receives on inactive slave.

Allow a packet_type that specifies the exact device to receive
even on an inactive bonding slave devices.  This is important for some
L2 protocols such as LLDP and FCoE.  This can eventually be used
for the bonding special cases as well.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4a09833331f1..dab97c7cf275 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2165,6 +2165,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
+	struct net_device *null_or_orig;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -2178,13 +2179,13 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->iif)
 		skb->iif = skb->dev->ifindex;
 
+	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb)) {
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-		skb->dev = orig_dev->master;
+		if (skb_bond_should_drop(skb))
+			null_or_orig = orig_dev; /* deliver only exact match */
+		else
+			skb->dev = orig_dev->master;
 	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
@@ -2209,7 +2210,7 @@ int netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev) {
+		if (ptype->dev == null_or_orig || ptype->dev == skb->dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -2234,7 +2235,7 @@ ncls:
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
-		    (!ptype->dev || ptype->dev == skb->dev)) {
+		    (ptype->dev == null_or_orig || ptype->dev == skb->dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit 


From f982307f22db96201e41540295f24e8dcc10c78f Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jre@nuovasystems.com>
Date: Wed, 2 Jul 2008 18:22:02 -0700
Subject: net/core: Allow receive on active slaves.

If a packet_type specifies an active slave to bonding and not just any
interface, allow it to receive frames that came in on that interface.

Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index dab97c7cf275..600bb23c4c2e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2210,7 +2210,8 @@ int netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (ptype->dev == null_or_orig || ptype->dev == skb->dev) {
+		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+		    ptype->dev == orig_dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -2235,7 +2236,8 @@ ncls:
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type &&
-		    (ptype->dev == null_or_orig || ptype->dev == skb->dev)) {
+		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+		     ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit