summaryrefslogtreecommitdiff
path: root/drivers/net/ovpn/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ovpn/tcp.c')
-rw-r--r--drivers/net/ovpn/tcp.c598
1 files changed, 598 insertions, 0 deletions
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
new file mode 100644
index 000000000000..7c42d84987ad
--- /dev/null
+++ b/drivers/net/ovpn/tcp.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/skbuff.h>
+#include <net/hotdata.h>
+#include <net/inet_common.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/transp_v6.h>
+#include <net/route.h>
+#include <trace/events/sock.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "io.h"
+#include "peer.h"
+#include "proto.h"
+#include "skb.h"
+#include "tcp.h"
+
+#define OVPN_TCP_DEPTH_NESTING 2
+#if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
+#error "OVPN TCP requires its own lockdep subclass"
+#endif
+
+static struct proto ovpn_tcp_prot __ro_after_init;
+static struct proto_ops ovpn_tcp_ops __ro_after_init;
+static struct proto ovpn_tcp6_prot __ro_after_init;
+static struct proto_ops ovpn_tcp6_ops __ro_after_init;
+
+static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+ struct strp_msg *rxm = strp_msg(skb);
+ __be16 blen;
+ u16 len;
+ int err;
+
+ /* when packets are written to the TCP stream, they are prepended with
+ * two bytes indicating the actual packet size.
+ * Parse accordingly and return the actual size (including the size
+ * header)
+ */
+
+ if (skb->len < rxm->offset + 2)
+ return 0;
+
+ err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
+ if (err < 0)
+ return err;
+
+ len = be16_to_cpu(blen);
+ if (len < 2)
+ return -EINVAL;
+
+ return len + 2;
+}
+
+/* queue skb for sending to userspace via recvmsg on the socket */
+static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb)
+{
+ skb_set_owner_r(skb, sk);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ skb_queue_tail(&peer->tcp.user_queue, skb);
+ peer->tcp.sk_cb.sk_data_ready(sk);
+}
+
+static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
+{
+ struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
+ struct strp_msg *msg = strp_msg(skb);
+ size_t pkt_len = msg->full_len - 2;
+ size_t off = msg->offset + 2;
+ u8 opcode;
+
+ /* ensure skb->data points to the beginning of the openvpn packet */
+ if (!pskb_pull(skb, off)) {
+ net_warn_ratelimited("%s: packet too small for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* strparser does not trim the skb for us, therefore we do it now */
+ if (pskb_trim(skb, pkt_len) != 0) {
+ net_warn_ratelimited("%s: trimming skb failed for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* we need the first 4 bytes of data to be accessible
+ * to extract the opcode and the key ID later on
+ */
+ if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) {
+ net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* DATA_V2 packets are handled in kernel, the rest goes to user space */
+ opcode = ovpn_opcode_from_skb(skb, 0);
+ if (unlikely(opcode != OVPN_DATA_V2)) {
+ if (opcode == OVPN_DATA_V1) {
+ net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n",
+ netdev_name(peer->ovpn->dev));
+ goto err;
+ }
+
+ /* The packet size header must be there when sending the packet
+ * to userspace, therefore we put it back
+ */
+ skb_push(skb, 2);
+ ovpn_tcp_to_userspace(peer, strp->sk, skb);
+ return;
+ }
+
+ /* hold reference to peer as required by ovpn_recv().
+ *
+ * NOTE: in this context we should already be holding a reference to
+ * this peer, therefore ovpn_peer_hold() is not expected to fail
+ */
+ if (WARN_ON(!ovpn_peer_hold(peer)))
+ goto err;
+
+ ovpn_recv(peer, skb);
+ return;
+err:
+ dev_dstats_rx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
+}
+
+static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int flags, int *addr_len)
+{
+ int err = 0, off, copied = 0, ret;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
+ rcu_read_unlock();
+ return -EBADF;
+ }
+ peer = sock->peer;
+ rcu_read_unlock();
+
+ skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err);
+ if (!skb) {
+ if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) {
+ ret = 0;
+ goto out;
+ }
+ ret = err;
+ goto out;
+ }
+
+ copied = len;
+ if (copied > skb->len)
+ copied = skb->len;
+ else if (copied < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ ret = err;
+ goto out;
+ }
+
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
+ kfree_skb(skb);
+ ret = copied;
+out:
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
+{
+ struct ovpn_peer *peer = ovpn_sock->peer;
+ struct socket *sock = ovpn_sock->sock;
+
+ strp_stop(&peer->tcp.strp);
+ skb_queue_purge(&peer->tcp.user_queue);
+
+ /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
+ sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
+ sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
+ sock->sk->sk_prot = peer->tcp.sk_cb.prot;
+ sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+
+ rcu_assign_sk_user_data(sock->sk, NULL);
+}
+
+void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock)
+{
+ struct ovpn_peer *peer = sock->peer;
+
+ /* NOTE: we don't wait for peer->tcp.defer_del_work to finish:
+ * either the worker is not running or this function
+ * was invoked by that worker.
+ */
+
+ cancel_work_sync(&sock->tcp_tx_work);
+ strp_done(&peer->tcp.strp);
+
+ skb_queue_purge(&peer->tcp.out_queue);
+ kfree_skb(peer->tcp.out_msg.skb);
+ peer->tcp.out_msg.skb = NULL;
+}
+
+static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
+{
+ struct sk_buff *skb = peer->tcp.out_msg.skb;
+ int ret, flags;
+
+ if (!skb)
+ return;
+
+ if (peer->tcp.tx_in_progress)
+ return;
+
+ peer->tcp.tx_in_progress = true;
+
+ do {
+ flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0;
+ ret = skb_send_sock_locked_with_flags(sk, skb,
+ peer->tcp.out_msg.offset,
+ peer->tcp.out_msg.len,
+ flags);
+ if (unlikely(ret < 0)) {
+ if (ret == -EAGAIN)
+ goto out;
+
+ net_warn_ratelimited("%s: TCP error to peer %u: %d\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, ret);
+
+ /* in case of TCP error we can't recover the VPN
+ * stream therefore we abort the connection
+ */
+ ovpn_peer_hold(peer);
+ schedule_work(&peer->tcp.defer_del_work);
+
+ /* we bail out immediately and keep tx_in_progress set
+ * to true. This way we prevent more TX attempts
+ * which would lead to more invocations of
+ * schedule_work()
+ */
+ return;
+ }
+
+ peer->tcp.out_msg.len -= ret;
+ peer->tcp.out_msg.offset += ret;
+ } while (peer->tcp.out_msg.len > 0);
+
+ if (!peer->tcp.out_msg.len) {
+ preempt_disable();
+ dev_dstats_tx_add(peer->ovpn->dev, skb->len);
+ preempt_enable();
+ }
+
+ kfree_skb(peer->tcp.out_msg.skb);
+ peer->tcp.out_msg.skb = NULL;
+ peer->tcp.out_msg.len = 0;
+ peer->tcp.out_msg.offset = 0;
+
+out:
+ peer->tcp.tx_in_progress = false;
+}
+
+void ovpn_tcp_tx_work(struct work_struct *work)
+{
+ struct ovpn_socket *sock;
+
+ sock = container_of(work, struct ovpn_socket, tcp_tx_work);
+
+ lock_sock(sock->sock->sk);
+ if (sock->peer)
+ ovpn_tcp_send_sock(sock->peer, sock->sock->sk);
+ release_sock(sock->sock->sk);
+}
+
+static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (peer->tcp.out_msg.skb)
+ ovpn_tcp_send_sock(peer, sk);
+
+ if (peer->tcp.out_msg.skb) {
+ dev_dstats_tx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ return;
+ }
+
+ peer->tcp.out_msg.skb = skb;
+ peer->tcp.out_msg.len = skb->len;
+ peer->tcp.out_msg.offset = 0;
+ ovpn_tcp_send_sock(peer, sk);
+}
+
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+ struct sk_buff *skb)
+{
+ u16 len = skb->len;
+
+ *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
+
+ spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
+ if (sock_owned_by_user(sock->sk)) {
+ if (skb_queue_len(&peer->tcp.out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog)) {
+ dev_dstats_tx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ goto unlock;
+ }
+ __skb_queue_tail(&peer->tcp.out_queue, skb);
+ } else {
+ ovpn_tcp_send_sock_skb(peer, sock->sk, skb);
+ }
+unlock:
+ spin_unlock(&sock->sk->sk_lock.slock);
+}
+
+static void ovpn_tcp_release(struct sock *sk)
+{
+ struct sk_buff_head queue;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (!sock) {
+ rcu_read_unlock();
+ return;
+ }
+
+ peer = sock->peer;
+
+ /* during initialization this function is called before
+ * assigning sock->peer
+ */
+ if (unlikely(!peer || !ovpn_peer_hold(peer))) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ __skb_queue_head_init(&queue);
+ skb_queue_splice_init(&peer->tcp.out_queue, &queue);
+
+ while ((skb = __skb_dequeue(&queue)))
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
+
+ peer->tcp.sk_cb.prot->release_cb(sk);
+ ovpn_peer_put(peer);
+}
+
+static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ struct ovpn_socket *sock;
+ int ret, linear = PAGE_SIZE;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ lock_sock(sk);
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
+ rcu_read_unlock();
+ release_sock(sk);
+ return -EIO;
+ }
+ rcu_read_unlock();
+ peer = sock->peer;
+
+ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) {
+ ret = -EOPNOTSUPP;
+ goto peer_free;
+ }
+
+ if (peer->tcp.out_msg.skb) {
+ ret = -EAGAIN;
+ goto peer_free;
+ }
+
+ if (size < linear)
+ linear = size;
+
+ skb = sock_alloc_send_pskb(sk, linear, size - linear,
+ msg->msg_flags & MSG_DONTWAIT, &ret, 0);
+ if (!skb) {
+ net_err_ratelimited("%s: skb alloc failed: %d\n",
+ netdev_name(peer->ovpn->dev), ret);
+ goto peer_free;
+ }
+
+ skb_put(skb, linear);
+ skb->len = size;
+ skb->data_len = size - linear;
+
+ ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (ret) {
+ kfree_skb(skb);
+ net_err_ratelimited("%s: skb copy from iter failed: %d\n",
+ netdev_name(peer->ovpn->dev), ret);
+ goto peer_free;
+ }
+
+ ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL;
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
+ ret = size;
+peer_free:
+ release_sock(sk);
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+static int ovpn_tcp_disconnect(struct sock *sk, int flags)
+{
+ return -EBUSY;
+}
+
+static void ovpn_tcp_data_ready(struct sock *sk)
+{
+ struct ovpn_socket *sock;
+
+ trace_sk_data_ready(sk);
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (likely(sock && sock->peer))
+ strp_data_ready(&sock->peer->tcp.strp);
+ rcu_read_unlock();
+}
+
+static void ovpn_tcp_write_space(struct sock *sk)
+{
+ struct ovpn_socket *sock;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (likely(sock && sock->peer)) {
+ schedule_work(&sock->tcp_tx_work);
+ sock->peer->tcp.sk_cb.sk_write_space(sk);
+ }
+ rcu_read_unlock();
+}
+
+static void ovpn_tcp_build_protos(struct proto *new_prot,
+ struct proto_ops *new_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops);
+
+static void ovpn_tcp_peer_del_work(struct work_struct *work)
+{
+ struct ovpn_peer *peer = container_of(work, struct ovpn_peer,
+ tcp.defer_del_work);
+
+ ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
+ ovpn_peer_put(peer);
+}
+
+/* Set TCP encapsulation callbacks */
+int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct ovpn_peer *peer)
+{
+ struct socket *sock = ovpn_sock->sock;
+ struct strp_callbacks cb = {
+ .rcv_msg = ovpn_tcp_rcv,
+ .parse_msg = ovpn_tcp_parse,
+ };
+ int ret;
+
+ /* make sure no pre-existing encapsulation handler exists */
+ if (sock->sk->sk_user_data)
+ return -EBUSY;
+
+ /* only a fully connected socket is expected. Connection should be
+ * handled in userspace
+ */
+ if (sock->sk->sk_state != TCP_ESTABLISHED) {
+ net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
+ netdev_name(peer->ovpn->dev),
+ sock->sk->sk_state);
+ return -EINVAL;
+ }
+
+ ret = strp_init(&peer->tcp.strp, sock->sk, &cb);
+ if (ret < 0) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return ret;
+ }
+
+ INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
+
+ __sk_dst_reset(sock->sk);
+ skb_queue_head_init(&peer->tcp.user_queue);
+ skb_queue_head_init(&peer->tcp.out_queue);
+
+ /* save current CBs so that they can be restored upon socket release */
+ peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
+ peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
+ peer->tcp.sk_cb.prot = sock->sk->sk_prot;
+ peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops;
+
+ /* assign our static CBs and prot/ops */
+ sock->sk->sk_data_ready = ovpn_tcp_data_ready;
+ sock->sk->sk_write_space = ovpn_tcp_write_space;
+
+ if (sock->sk->sk_family == AF_INET) {
+ sock->sk->sk_prot = &ovpn_tcp_prot;
+ sock->sk->sk_socket->ops = &ovpn_tcp_ops;
+ } else {
+ sock->sk->sk_prot = &ovpn_tcp6_prot;
+ sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
+ }
+
+ /* avoid using task_frag */
+ sock->sk->sk_allocation = GFP_ATOMIC;
+ sock->sk->sk_use_task_frag = false;
+
+ /* enqueue the RX worker */
+ strp_check_rcv(&peer->tcp.strp);
+
+ return 0;
+}
+
+static void ovpn_tcp_close(struct sock *sk, long timeout)
+{
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) {
+ rcu_read_unlock();
+ return;
+ }
+ peer = sock->peer;
+ rcu_read_unlock();
+
+ ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
+ peer->tcp.sk_cb.prot->close(sk, timeout);
+ ovpn_peer_put(peer);
+}
+
+static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ __poll_t mask = datagram_poll(file, sock, wait);
+ struct ovpn_socket *ovpn_sock;
+
+ rcu_read_lock();
+ ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
+ if (ovpn_sock && ovpn_sock->peer &&
+ !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ rcu_read_unlock();
+
+ return mask;
+}
+
+static void ovpn_tcp_build_protos(struct proto *new_prot,
+ struct proto_ops *new_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops)
+{
+ memcpy(new_prot, orig_prot, sizeof(*new_prot));
+ memcpy(new_ops, orig_ops, sizeof(*new_ops));
+ new_prot->recvmsg = ovpn_tcp_recvmsg;
+ new_prot->sendmsg = ovpn_tcp_sendmsg;
+ new_prot->disconnect = ovpn_tcp_disconnect;
+ new_prot->close = ovpn_tcp_close;
+ new_prot->release_cb = ovpn_tcp_release;
+ new_ops->poll = ovpn_tcp_poll;
+}
+
+/* Initialize TCP static objects */
+void __init ovpn_tcp_init(void)
+{
+ ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot,
+ &inet_stream_ops);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot,
+ &inet6_stream_ops);
+#endif
+}