diff options
author | Alexei Starovoitov <ast@kernel.org> | 2018-05-24 18:36:16 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2018-05-24 18:36:16 -0700 |
commit | 10f678683e4026e43524b0492068a371d00fdeed (patch) | |
tree | 069715fbcf7b0f4b73103861fd0a111e143b5705 /drivers/net/virtio_net.c | |
parent | f80acbd233382619f597f785f8c238084dc62e21 (diff) | |
parent | a570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff) |
Merge branch 'xdp_xmit-bulking'
Jesper Dangaard Brouer says:
====================
This patchset change ndo_xdp_xmit API to take a bulk of xdp frames.
When kernel is compiled with CONFIG_RETPOLINE, every indirect function
pointer (branch) call hurts performance. For XDP this have a huge
negative performance impact.
This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but
also prepares for further optimizations. The DMA APIs use of indirect
function pointer calls is the primary source the regression. It is
left for a followup patchset, to use bulking calls towards the DMA API
(via the scatter-gatter calls).
The other advantage of this API change is that drivers can easier
amortize the cost of any sync/locking scheme, over the bulk of
packets. The assumption of the current API is that the driver
implemementing the NDO will also allocate a dedicated XDP TX queue for
every CPU in the system. Which is not always possible or practical to
configure. E.g. ixgbe cannot load an XDP program on a machine with
more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net
is hard to configure as it requires manually increasing the
queues. E.g. tun driver chooses to use a per XDP frame producer lock
modulo smp_processor_id over avail queues.
I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of
this patchset. This will be a followup patchset, once we know if this
will be needed (e.g. for non-map xdp_redirect flush-flag, and if
AF_XDP chooses to use ndo_xdp_xmit for TX).
---
V5: Fixed up issues spotted by Daniel and John
V4: Splitout the patches from 4 to 8 patches. I cannot split the
driver changes from the NDO change, but I've tried to isolated the NDO
change together with the driver change as much as possible.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 66 |
1 files changed, 49 insertions, 17 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f34794a76c4d..39a0783d1cde 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev) virtqueue_kick(sq->vq); } -static int __virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_frame *xdpf) +static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, + struct send_queue *sq, + struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - struct xdp_frame *xdpf_sent; - struct send_queue *sq; - unsigned int len; - unsigned int qp; int err; - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - sq = &vi->sq[qp]; - - /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); - /* virtqueue want to use data area in-front of packet */ if (unlikely(xdpf->metasize > 0)) return -EOPNOTSUPP; @@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi, return 0; } -static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, + struct xdp_frame *xdpf) +{ + struct xdp_frame *xdpf_sent; + struct send_queue *sq; + unsigned int len; + unsigned int qp; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + + /* Free up any pending old buffers before queueing new ones. */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); + + return __virtnet_xdp_xmit_one(vi, sq, xdpf); +} + +static int virtnet_xdp_xmit(struct net_device *dev, + int n, struct xdp_frame **frames) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; + struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; + struct send_queue *sq; + unsigned int len; + unsigned int qp; + int drops = 0; + int err; + int i; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. @@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (!xdp_prog) return -ENXIO; - return __virtnet_xdp_xmit(vi, xdpf); + /* Free up any pending old buffers before queueing new ones. */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + + err = __virtnet_xdp_xmit_one(vi, sq, xdpf); + if (err) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + return n - drops; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_xmit(vi, xdpf); + err = __virtnet_xdp_tx_xmit(vi, xdpf); if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; @@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_xmit(vi, xdpf); + err = __virtnet_xdp_tx_xmit(vi, xdpf); if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) |