diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 203 |
1 files changed, 145 insertions, 58 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4b..5d674eb9a0f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } +static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, + unsigned int len) +{ + unsigned int headroom, tailroom, room, truesize; + + truesize = mergeable_ctx_to_truesize(mrg_ctx); + headroom = mergeable_ctx_to_headroom(mrg_ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + if (len > truesize - room) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + DEV_STATS_INC(dev, rx_length_errors); + return -1; + } + + return 0; +} + static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) @@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi, * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); @@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; @@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ -static struct page *xdp_linearize_page(struct receive_queue *rq, +static struct page *xdp_linearize_page(struct net_device *dev, + struct receive_queue *rq, int *num_buf, struct page *p, int offset, @@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; + /* Only mergeable mode can go inside this while loop. In small mode, + * *num_buf == 1, so it cannot go inside. + */ while (--*num_buf) { unsigned int buflen; void *buf; + void *ctx; int off; - buf = virtnet_rq_get_buf(rq, &buflen, NULL); + buf = virtnet_rq_get_buf(rq, &buflen, &ctx); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); + if (check_mergeable_len(dev, ctx, buflen)) { + put_page(p); + goto err_buf; + } + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ @@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, + xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) @@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - unsigned int headroom, tailroom, room; - unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; + unsigned int truesize; struct page *page; skb_frag_t *frag; int offset; @@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - - cur_frag_size = truesize; - xdp_frags_truesz += cur_frag_size; - if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + if (check_mergeable_len(dev, ctx, len)) { put_page(page); - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); goto err; } + truesize = mergeable_ctx_to_truesize(ctx); + xdp_frags_truesz += truesize; + frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) @@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); @@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; @@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } + truesize = mergeable_ctx_to_truesize(ctx); curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) @@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) else free_old_xmit(sq, txq, !!budget); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3342,7 +3365,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +static void __virtnet_rx_pause(struct virtnet_info *vi, + struct receive_queue *rq) { bool running = netif_running(vi->dev); @@ -3352,15 +3376,64 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) } } -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +static void virtnet_rx_pause_all(struct virtnet_info *vi) { - bool running = netif_running(vi->dev); + int i; - if (!try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + __virtnet_rx_pause(vi, &vi->rq[i]); +} + +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +{ + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + __virtnet_rx_pause(vi, rq); +} + +static void __virtnet_rx_resume(struct virtnet_info *vi, + struct receive_queue *rq, + bool refill) +{ + bool running = netif_running(vi->dev); + bool schedule_refill = false; + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); +} + +static void virtnet_rx_resume_all(struct virtnet_info *vi) +{ + int i; + + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + __virtnet_rx_resume(vi, &vi->rq[i], true); + else + __virtnet_rx_resume(vi, &vi->rq[i], false); + } +} + +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +{ + enable_delayed_refill(vi); + __virtnet_rx_resume(vi, rq, true); } static int virtnet_rx_resize(struct virtnet_info *vi, @@ -3431,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, { int qindex, err; + if (ring_num <= MAX_SKB_FRAGS + 2) { + netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", + ring_num, MAX_SKB_FRAGS + 2); + return -EINVAL; + } + qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); @@ -3681,8 +3760,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } @@ -5626,6 +5707,10 @@ static void virtnet_get_base_stats(struct net_device *dev, if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; + + netdev_stat_queue_sum(dev, + dev->real_num_rx_queues, vi->max_queue_pairs, rx, + dev->real_num_tx_queues, vi->max_queue_pairs, tx); } static const struct netdev_stat_ops virtnet_stat_ops = { @@ -5838,8 +5923,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5867,6 +5954,8 @@ err_rq: err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } @@ -5959,12 +6048,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); + virtnet_rx_pause_all(vi); + /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_disable(&vi->sq[i]); - } } if (!prog) { @@ -5996,13 +6085,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_enabled = false; } + virtnet_rx_resume_all(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); + if (netif_running(dev)) virtnet_napi_tx_enable(&vi->sq[i]); - } } return 0; @@ -6014,11 +6102,10 @@ err: rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } + virtnet_rx_resume_all(vi); if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_enable(&vi->sq[i]); - } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); |