summaryrefslogtreecommitdiff
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 20:01:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 20:01:30 -0800
commitc5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch)
tree9830baf38832769e1cf621708889111bbe3c93df /drivers/vhost/net.c
parent29afc4e9a408f2304e09c6dd0dbcfbd2356d0faa (diff)
parent9399f0c51489ae8c16d6559b82a452fdc1895e91 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) More iov_iter conversion work from Al Viro. [ The "crypto: switch af_alg_make_sg() to iov_iter" commit was wrong, and this pull actually adds an extra commit on top of the branch I'm pulling to fix that up, so that the pre-merge state is ok. - Linus ] 2) Various optimizations to the ipv4 forwarding information base trie lookup implementation. From Alexander Duyck. 3) Remove sock_iocb altogether, from CHristoph Hellwig. 4) Allow congestion control algorithm selection via routing metrics. From Daniel Borkmann. 5) Make ipv4 uncached route list per-cpu, from Eric Dumazet. 6) Handle rfs hash collisions more gracefully, also from Eric Dumazet. 7) Add xmit_more support to r8169, e1000, and e1000e drivers. From Florian Westphal. 8) Transparent Ethernet Bridging support for GRO, from Jesse Gross. 9) Add BPF packet actions to packet scheduler, from Jiri Pirko. 10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer. 11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman Kwok. 12) More sanely handle out-of-window dupacks, which can result in serious ACK storms. From Neal Cardwell. 13) Various rhashtable bug fixes and enhancements, from Herbert Xu, Patrick McHardy, and Thomas Graf. 14) Support xmit_more in be2net, from Sathya Perla. 15) Group Policy extensions for vxlan, from Thomas Graf. 16) Remove Checksum Offload support for vxlan, from Tom Herbert. 17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From Vlad Yasevich. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits) crypto: fix af_alg_make_sg() conversion to iov_iter ipv4: Namespecify TCP PMTU mechanism i40e: Fix for stats init function call in Rx setup tcp: don't include Fast Open option in SYN-ACK on pure SYN-data openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set ipv6: Make __ipv6_select_ident static ipv6: Fix fragment id assignment on LE arches. bridge: Fix inability to add non-vlan fdb entry net: Mellanox: Delete unnecessary checks before the function call "vunmap" cxgb4: Add support in cxgb4 to get expansion rom version via ethtool ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version net: dsa: Remove redundant phy_attach() IB/mlx4: Reset flow support for IB kernel ULPs IB/mlx4: Always use the correct port for mirrored multicast attachments net/bonding: Fix potential bad memory access during bonding events tipc: remove tipc_snprintf tipc: nl compat add noop and remove legacy nl framework tipc: convert legacy nl stats show to nl compat tipc: convert legacy nl net id get to nl compat tipc: convert legacy nl net id set to nl compat ...
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c83
1 files changed, 23 insertions, 60 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9484d5652ca5..8dccca9013ed 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -84,10 +84,6 @@ struct vhost_net_ubuf_ref {
struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
- /* hdr is used to store the virtio header.
- * Since each iovec has >= 1 byte length, we never need more than
- * header length entries to store the header. */
- struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
size_t vhost_hlen;
size_t sock_hlen;
/* vhost zerocopy support fields below: */
@@ -235,44 +231,6 @@ static bool vhost_sock_zcopy(struct socket *sock)
sock_flag(sock->sk, SOCK_ZEROCOPY);
}
-/* Pop first len bytes from iovec. Return number of segments used. */
-static int move_iovec_hdr(struct iovec *from, struct iovec *to,
- size_t len, int iov_count)
-{
- int seg = 0;
- size_t size;
-
- while (len && seg < iov_count) {
- size = min(from->iov_len, len);
- to->iov_base = from->iov_base;
- to->iov_len = size;
- from->iov_len -= size;
- from->iov_base += size;
- len -= size;
- ++from;
- ++to;
- ++seg;
- }
- return seg;
-}
-/* Copy iovec entries for len bytes from iovec. */
-static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
- size_t len, int iovcount)
-{
- int seg = 0;
- size_t size;
-
- while (len && seg < iovcount) {
- size = min(from->iov_len, len);
- to->iov_base = from->iov_base;
- to->iov_len = size;
- len -= size;
- ++from;
- ++to;
- ++seg;
- }
-}
-
/* In case of DMA done not in order in lower device driver for some reason.
* upend_idx is used to track end of used idx, done_idx is used to track head
* of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -336,7 +294,7 @@ static void handle_tx(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
- unsigned out, in, s;
+ unsigned out, in;
int head;
struct msghdr msg = {
.msg_name = NULL,
@@ -395,16 +353,17 @@ static void handle_tx(struct vhost_net *net)
break;
}
/* Skip header. TODO: support TSO. */
- s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
len = iov_length(vq->iov, out);
iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
+ iov_iter_advance(&msg.msg_iter, hdr_size);
/* Sanity check */
- if (!len) {
+ if (!iov_iter_count(&msg.msg_iter)) {
vq_err(vq, "Unexpected header len for TX: "
"%zd expected %zd\n",
- iov_length(nvq->hdr, s), hdr_size);
+ len, hdr_size);
break;
}
+ len = iov_iter_count(&msg.msg_iter);
zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
&& (nvq->upend_idx + 1) % UIO_MAXIOV !=
@@ -469,7 +428,7 @@ static int peek_head_len(struct sock *sk)
head = skb_peek(&sk->sk_receive_queue);
if (likely(head)) {
len = head->len;
- if (vlan_tx_tag_present(head))
+ if (skb_vlan_tag_present(head))
len += VLAN_HLEN;
}
@@ -579,6 +538,7 @@ static void handle_rx(struct vhost_net *net)
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
struct socket *sock;
+ struct iov_iter fixup;
mutex_lock(&vq->mutex);
sock = vq->private_data;
@@ -623,14 +583,19 @@ static void handle_rx(struct vhost_net *net)
break;
}
/* We don't need to be notified again. */
- if (unlikely((vhost_hlen)))
- /* Skip header. TODO: support TSO. */
- move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
- else
- /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
- * needed because recvmsg can modify msg_iov. */
- copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
- iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len);
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+ fixup = msg.msg_iter;
+ if (unlikely((vhost_hlen))) {
+ /* We will supply the header ourselves
+ * TODO: support TSO.
+ */
+ iov_iter_advance(&msg.msg_iter, vhost_hlen);
+ } else {
+ /* It'll come from socket; we'll need to patch
+ * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF
+ */
+ iov_iter_advance(&fixup, sizeof(hdr));
+ }
err = sock->ops->recvmsg(NULL, sock, &msg,
sock_len, MSG_DONTWAIT | MSG_TRUNC);
/* Userspace might have consumed the packet meanwhile:
@@ -642,9 +607,9 @@ static void handle_rx(struct vhost_net *net)
vhost_discard_vq_desc(vq, headcount);
continue;
}
+ /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
if (unlikely(vhost_hlen) &&
- memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
- vhost_hlen)) {
+ copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) {
vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
vq->iov->iov_base);
break;
@@ -653,9 +618,7 @@ static void handle_rx(struct vhost_net *net)
hdr.num_buffers = cpu_to_vhost16(vq, headcount);
if (likely(mergeable) &&
- memcpy_toiovecend(nvq->hdr, (void *)&hdr.num_buffers,
- offsetof(typeof(hdr), num_buffers),
- sizeof hdr.num_buffers)) {
+ copy_to_iter(&hdr.num_buffers, 2, &fixup) != 2) {
vq_err(vq, "Failed num_buffers write");
vhost_discard_vq_desc(vq, headcount);
break;