summaryrefslogtreecommitdiff
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-05-24 18:36:16 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-05-24 18:36:16 -0700
commit10f678683e4026e43524b0492068a371d00fdeed (patch)
tree069715fbcf7b0f4b73103861fd0a111e143b5705 /drivers/net/virtio_net.c
parentf80acbd233382619f597f785f8c238084dc62e21 (diff)
parenta570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff)
Merge branch 'xdp_xmit-bulking'
Jesper Dangaard Brouer says: ==================== This patchset change ndo_xdp_xmit API to take a bulk of xdp frames. When kernel is compiled with CONFIG_RETPOLINE, every indirect function pointer (branch) call hurts performance. For XDP this have a huge negative performance impact. This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but also prepares for further optimizations. The DMA APIs use of indirect function pointer calls is the primary source the regression. It is left for a followup patchset, to use bulking calls towards the DMA API (via the scatter-gatter calls). The other advantage of this API change is that drivers can easier amortize the cost of any sync/locking scheme, over the bulk of packets. The assumption of the current API is that the driver implemementing the NDO will also allocate a dedicated XDP TX queue for every CPU in the system. Which is not always possible or practical to configure. E.g. ixgbe cannot load an XDP program on a machine with more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net is hard to configure as it requires manually increasing the queues. E.g. tun driver chooses to use a per XDP frame producer lock modulo smp_processor_id over avail queues. I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of this patchset. This will be a followup patchset, once we know if this will be needed (e.g. for non-map xdp_redirect flush-flag, and if AF_XDP chooses to use ndo_xdp_xmit for TX). --- V5: Fixed up issues spotted by Daniel and John V4: Splitout the patches from 4 to 8 patches. I cannot split the driver changes from the NDO change, but I've tried to isolated the NDO change together with the driver change as much as possible. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c66
1 files changed, 49 insertions, 17 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f34794a76c4d..39a0783d1cde 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq);
}
-static int __virtnet_xdp_xmit(struct virtnet_info *vi,
- struct xdp_frame *xdpf)
+static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+ struct send_queue *sq,
+ struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
- struct xdp_frame *xdpf_sent;
- struct send_queue *sq;
- unsigned int len;
- unsigned int qp;
int err;
- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
- sq = &vi->sq[qp];
-
- /* Free up any pending old buffers before queueing new ones. */
- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
- xdp_return_frame(xdpf_sent);
-
/* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0;
}
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
+ struct xdp_frame *xdpf)
+{
+ struct xdp_frame *xdpf_sent;
+ struct send_queue *sq;
+ unsigned int len;
+ unsigned int qp;
+
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ sq = &vi->sq[qp];
+
+ /* Free up any pending old buffers before queueing new ones. */
+ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+ xdp_return_frame(xdpf_sent);
+
+ return __virtnet_xdp_xmit_one(vi, sq, xdpf);
+}
+
+static int virtnet_xdp_xmit(struct net_device *dev,
+ int n, struct xdp_frame **frames)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
+ struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
+ struct send_queue *sq;
+ unsigned int len;
+ unsigned int qp;
+ int drops = 0;
+ int err;
+ int i;
+
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ sq = &vi->sq[qp];
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog)
return -ENXIO;
- return __virtnet_xdp_xmit(vi, xdpf);
+ /* Free up any pending old buffers before queueing new ones. */
+ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+ xdp_return_frame(xdpf_sent);
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+
+ err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
+ if (err) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+ return n - drops;
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
- err = __virtnet_xdp_xmit(vi, xdpf);
+ err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
- err = __virtnet_xdp_xmit(vi, xdpf);
+ err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page))