summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/fastopen.c11
-rw-r--r--net/mptcp/options.c14
-rw-r--r--net/mptcp/pm.c4
-rw-r--r--net/mptcp/pm_netlink.c6
-rw-r--r--net/mptcp/pm_userspace.c4
-rw-r--r--net/mptcp/protocol.c183
-rw-r--r--net/mptcp/protocol.h17
-rw-r--r--net/mptcp/sockopt.c46
-rw-r--r--net/mptcp/subflow.c147
9 files changed, 275 insertions, 157 deletions
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index d237d142171c..bceaab8dd8e4 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -9,11 +9,18 @@
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req)
{
- struct sock *ssk = subflow->tcp_sock;
- struct sock *sk = subflow->conn;
+ struct sock *sk, *ssk;
struct sk_buff *skb;
struct tcp_sock *tp;
+ /* on early fallback the subflow context is deleted by
+ * subflow_syn_recv_sock()
+ */
+ if (!subflow)
+ return;
+
+ ssk = subflow->tcp_sock;
+ sk = subflow->conn;
tp = tcp_sk(ssk);
subflow->is_mptfo = 1;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b30cea2fbf3f..19a01b6566f1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -442,7 +442,6 @@ static void clear_3rdack_retransmission(struct sock *sk)
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -556,7 +555,6 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -580,7 +578,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy = *mpext;
}
- remaining -= map_size;
dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
@@ -851,9 +848,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
}
snd_data_fin = mptcp_data_fin_enabled(msk);
- if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) {
unsigned int mp_fail_size;
ret = true;
@@ -1001,7 +998,7 @@ check_notify:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk);
} else {
- mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
+ mptcp_pm_fully_established(msk, ssk);
}
return true;
@@ -1192,9 +1189,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
if (mp_opt.data_fin && mp_opt.data_len == 1 &&
- mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
- schedule_work(&msk->work))
- sock_hold(subflow->conn);
+ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
+ mptcp_schedule_work((struct sock *)msk);
return true;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 70f0ced3ca86..78c924506e83 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
@@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
if (announce)
- mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
+ mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5c8dea49626c..bc343dab5e3f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1035,8 +1035,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -2035,7 +2035,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
return -EMSGSIZE;
- sk_err = ssk->sk_err;
+ sk_err = READ_ONCE(ssk->sk_err);
if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
return -EMSGSIZE;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index a02d3cbf2a1b..27a275805c06 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -25,8 +25,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
}
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 60b23b2716c4..08dc53f56bc2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
-/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
- * completed yet or has failed, return the subflow socket.
- * Otherwise return NULL.
- */
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
-{
- if (!msk->subflow || READ_ONCE(msk->can_ack))
- return NULL;
-
- return msk->subflow;
-}
-
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
{
@@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
+/* If the MPC handshake is not started, returns the first subflow,
+ * eventually allocating it.
+ */
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ return ERR_PTR(-EINVAL);
+
+ if (!msk->subflow) {
+ if (msk->first)
+ return ERR_PTR(-EINVAL);
+
+ ret = __mptcp_socket_create(msk);
+ if (ret)
+ return ERR_PTR(ret);
+
+ mptcp_sockopt_sync(msk, msk->first);
+ }
+
+ return msk->subflow;
+}
+
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -459,7 +472,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
return false;
}
-static void mptcp_set_datafin_timeout(const struct sock *sk)
+static void mptcp_set_datafin_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 retransmits;
@@ -1662,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)
static int mptcp_disconnect(struct sock *sk, int flags);
-static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, int *copied_syn)
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ struct sock *ssk;
int ret;
+ /* on flags based fastopen the mptcp is supposed to create the
+ * first subflow right now. Otherwise we are in the defer_connect
+ * path, and the first subflow must be already present.
+ * Since the defer_connect flag is cleared after the first succsful
+ * fastopen attempt, no need to check for additional subflow status.
+ */
+ if (msg->msg_flags & MSG_FASTOPEN) {
+ ssock = __mptcp_nmpc_socket(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+ }
+ if (!msk->first)
+ return -EINVAL;
+
+ ssk = msk->first;
+
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
@@ -1691,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
} else if (ret && ret != -EINPROGRESS) {
mptcp_disconnect(sk, 0);
}
+ inet_sk(sk)->defer_connect = 0;
return ret;
}
@@ -1699,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
@@ -1709,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
- msg->msg_flags & MSG_FASTOPEN))) {
+ if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
- ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
+ ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2315,7 +2344,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push, dispose_it;
+ bool dispose_it, need_push = false;
+
+ /* If the first subflow moved to a close state before accept, e.g. due
+ * to an incoming reset, mptcp either:
+ * - if either the subflow or the msk are dead, destroy the context
+ * (the subflow socket is deleted by inet_child_forget) and the msk
+ * - otherwise do nothing at the moment and take action at accept and/or
+ * listener shutdown - user-space must be able to accept() the closed
+ * socket.
+ */
+ if (msk->in_accept_queue && msk->first == ssk) {
+ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
+ return;
+
+ /* ensure later check in mptcp_worker() will dispose the msk */
+ sock_set_flag(sk, SOCK_DEAD);
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_drop_ctx(ssk);
+ goto out_release;
+ }
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@@ -2351,28 +2399,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
- } else if (msk->in_accept_queue && msk->first == ssk) {
- /* if the first subflow moved to a close state, e.g. due to
- * incoming reset and we reach here before inet_child_forget()
- * the TCP stack could later try to close it via
- * inet_csk_listen_stop(), or deliver it to the user space via
- * accept().
- * We can't delete the subflow - or risk a double free - nor let
- * the msk survive - or will be leaked in the non accept scenario:
- * fallback and let TCP cope with the subflow cleanup.
- */
- WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
- mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN)
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
+
+out_release:
release_sock(ssk);
sock_put(ssk);
@@ -2427,21 +2469,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
- /* if the MPC subflow has been closed before the msk is accepted,
- * msk will never be accept-ed, close it now
- */
- if (!msk->first && msk->in_accept_queue) {
- sock_set_flag(sk, SOCK_DEAD);
- inet_sk_state_store(sk, TCP_CLOSE);
- }
}
-static bool mptcp_check_close_timeout(const struct sock *sk)
+static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
- if (delta >= TCP_TIMEWAIT_LEN)
+ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@@ -2480,15 +2515,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
/* Mirror the tcp_reset() error propagation */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
+ WRITE_ONCE(sk->sk_err, ECONNREFUSED);
break;
case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
+ WRITE_ONCE(sk->sk_err, EPIPE);
break;
case TCP_CLOSE:
return;
default:
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -2626,7 +2661,7 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk);
state = sk->sk_state;
- if (unlikely(state == TCP_CLOSE))
+ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
mptcp_check_data_fin_ack(sk);
@@ -2649,7 +2684,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_check_close_timeout(sk)) {
+ if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@@ -2728,10 +2763,6 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
- ret = __mptcp_socket_create(mptcp_sk(sk));
- if (ret)
- return ret;
-
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2895,6 +2926,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
+void __mptcp_unaccepted_force_close(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ __mptcp_destroy_sock(sk);
+}
+
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@@ -2928,10 +2967,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
- if (mptcp_check_readable(msk)) {
- /* the msk has read data, do the MPTCP equivalent of TCP reset */
+ if (mptcp_check_readable(msk) || timeout < 0) {
+ /* If the msk has read data, or the caller explicitly ask it,
+ * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
+ */
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
+ timeout = 0;
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}
@@ -3143,7 +3185,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = __mptcp_nmpc_socket(msk);
+ listener = msk->subflow;
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3363,7 +3405,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
- ssock = __mptcp_nmpc_socket(msk);
+ ssock = msk->subflow;
pr_debug("msk=%p, subflow=%p", msk, ssock);
if (WARN_ON_ONCE(!ssock))
return -EINVAL;
@@ -3551,8 +3593,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int err = -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
@@ -3640,8 +3682,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sock->sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3677,8 +3719,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3709,7 +3751,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- ssock = __mptcp_nmpc_socket(msk);
+ /* buggy applications can call accept on socket states other then LISTEN
+ * but no need to allocate the first subflow just to error out.
+ */
+ ssock = msk->subflow;
if (!ssock)
return -EINVAL;
@@ -3733,6 +3778,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
+
+ /* Do late cleanup for the first subflow as necessary. Also
+ * deal with bad peers not doing a complete shutdown.
+ */
+ if (msk->first &&
+ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ __mptcp_close_ssk(newsk, msk->first,
+ mptcp_subflow_ctx(msk->first), 0);
+ if (unlikely(list_empty(&msk->conn_list)))
+ inet_sk_state_store(newsk, TCP_CLOSE);
+ }
+
release_sock(newsk);
}
@@ -3791,7 +3848,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;
return mask;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 339a6f072989..2d7b2c80a164 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -334,10 +334,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk)
sock_owned_by_me((const struct sock *)msk);
}
-static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
-{
- return (struct mptcp_sock *)sk;
-}
+#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
/* the msk socket don't use the backlog, also account for the bulk
* free memory
@@ -371,7 +368,7 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
+ const struct mptcp_sock *msk = mptcp_sk(sk);
if (!msk->first_pending)
return NULL;
@@ -382,7 +379,7 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
-static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -629,10 +626,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
@@ -785,7 +784,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk,
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
@@ -833,8 +832,6 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8a9656248b0f..d4258869ac48 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTOIFINDEX:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
@@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
@@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
issk = inet_sk(ssock->sk);
@@ -762,13 +762,15 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *sock;
- int ret = -EINVAL;
+ int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
- if (!sock)
+ if (IS_ERR(sock)) {
+ ret = PTR_ERR(sock);
goto unlock;
+ }
ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
@@ -861,7 +863,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
- int ret = -EINVAL;
+ int ret;
struct sock *ssk;
lock_sock(sk);
@@ -872,8 +874,10 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
}
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
+ if (IS_ERR(ssock)) {
+ ret = PTR_ERR(ssock);
goto out;
+ }
ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
@@ -885,7 +889,6 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
u32 flags = 0;
- u8 val;
memset(info, 0, sizeof(*info));
@@ -893,12 +896,19 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
- info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
- val = mptcp_pm_get_add_addr_signal_max(msk);
- info->mptcpi_add_addr_signal_max = val;
- val = mptcp_pm_get_add_addr_accept_max(msk);
- info->mptcpi_add_addr_accepted_max = val;
- info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
+
+ /* The following limits only make sense for the in-kernel PM */
+ if (mptcp_pm_is_kernel(msk)) {
+ info->mptcpi_subflows_max =
+ mptcp_pm_get_subflows_max(msk);
+ info->mptcpi_add_addr_signal_max =
+ mptcp_pm_get_add_addr_signal_max(msk);
+ info->mptcpi_add_addr_accepted_max =
+ mptcp_pm_get_add_addr_accept_max(msk);
+ info->mptcpi_local_addr_max =
+ mptcp_pm_get_local_addr_max(msk);
+ }
+
if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
flags |= MPTCP_INFO_FLAG_FALLBACK;
if (READ_ONCE(msk->can_ack))
@@ -1046,7 +1056,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
memset(a, 0, sizeof(*a));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a0041360ee9d..ba065b66551a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -408,9 +408,8 @@ void mptcp_subflow_reset(struct sock *ssk)
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
- if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
- schedule_work(&mptcp_sk(sk)->work))
- return; /* worker will put sk for us */
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}
@@ -696,14 +695,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
-static void mptcp_force_close(struct sock *sk)
-{
- /* the msk is not yet exposed to user-space, and refcount is 2 */
- inet_sk_state_store(sk, TCP_CLOSE);
- sk_common_release(sk);
- sock_put(sk);
-}
-
static void subflow_ulp_fallback(struct sock *sk,
struct mptcp_subflow_context *old_ctx)
{
@@ -724,9 +715,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
if (!ctx)
return;
- subflow_ulp_fallback(ssk, ctx);
- if (ctx->conn)
- sock_put(ctx->conn);
+ list_del(&mptcp_subflow_ctx(ssk)->node);
+ if (inet_csk(ssk)->icsk_ulp_ops) {
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+ }
kfree_rcu(ctx, rcu);
}
@@ -755,7 +749,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
- struct sock *new_msk = NULL;
struct mptcp_sock *owner;
struct sock *child;
@@ -784,14 +777,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
fallback = true;
- goto create_child;
- }
- new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
- if (!new_msk)
- fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
@@ -820,23 +808,23 @@ create_child:
subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
goto dispose_child;
}
-
- if (new_msk)
- mptcp_copy_inaddrs(new_msk, child);
- mptcp_subflow_drop_ctx(child);
- goto out;
+ goto fallback;
}
/* ssk inherits options of listener sk */
ctx->setsockopt_seq = listener->setsockopt_seq;
if (ctx->mp_capable) {
- owner = mptcp_sk(new_msk);
+ ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req);
+ if (!ctx->conn)
+ goto fallback;
+
+ owner = mptcp_sk(ctx->conn);
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
+ inet_sk_state_store(ctx->conn, TCP_ESTABLISHED);
/* record the newly created socket as the first msk
* subflow, but don't link it yet into conn_list
@@ -846,11 +834,9 @@ create_child:
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
- mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
+ owner->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(owner, child, 1);
mptcp_token_accept(subflow_req, owner);
- ctx->conn = new_msk;
- new_msk = NULL;
/* set msk addresses early to ensure mptcp_pm_get_local_id()
* uses the correct data
@@ -867,7 +853,7 @@ create_child:
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
mptcp_subflow_fully_established(ctx, &mp_opt);
- mptcp_pm_fully_established(owner, child, GFP_ATOMIC);
+ mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
} else if (ctx->mp_join) {
@@ -900,11 +886,6 @@ create_child:
}
}
-out:
- /* dispose of the left over mptcp master, if any */
- if (unlikely(new_msk))
- mptcp_force_close(new_msk);
-
/* check for expected invariant - should never trigger, just help
* catching eariler subtle bugs
*/
@@ -922,6 +903,10 @@ dispose_child:
/* The last child reference will be released by the caller */
return child;
+
+fallback:
+ mptcp_subflow_drop_ctx(child);
+ return child;
}
static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
@@ -1118,8 +1103,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
} else {
- if (updated && schedule_work(&msk->work))
- sock_hold((struct sock *)msk);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
return MAPPING_DATA_FIN;
}
@@ -1222,17 +1207,12 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
- struct sock *sk = (struct sock *)msk;
-
if (likely(ssk->sk_state != TCP_CLOSE))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
- sock_hold(sk);
- if (!schedule_work(&msk->work))
- sock_put(sk);
- }
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ mptcp_schedule_work((struct sock *)msk);
}
static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
@@ -1350,7 +1330,7 @@ fallback:
subflow->reset_reason = MPTCP_RST_EMPTCP;
reset:
- ssk->sk_err = EBADMSG;
+ WRITE_ONCE(ssk->sk_err, EBADMSG);
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
@@ -1434,7 +1414,7 @@ void __mptcp_error_report(struct sock *sk)
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
- sk->sk_err = -err;
+ WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
@@ -1825,6 +1805,77 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+ struct sock *sk;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ sk = subflow->conn;
+ msk = mptcp_sk(sk);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ sock_hold(sk);
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ sk = (struct sock *)msk;
+
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ next = msk->dl_next;
+ msk->dl_next = NULL;
+
+ __mptcp_unaccepted_force_close(sk);
+ release_sock(sk);
+
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
+ mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
+ sock_put(sk);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);