diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/mptcp/ctrl.c | 26 | ||||
| -rw-r--r-- | net/mptcp/mib.c | 2 | ||||
| -rw-r--r-- | net/mptcp/mib.h | 2 | ||||
| -rw-r--r-- | net/mptcp/options.c | 8 | ||||
| -rw-r--r-- | net/mptcp/pm.c | 21 | ||||
| -rw-r--r-- | net/mptcp/pm_netlink.c | 39 | ||||
| -rw-r--r-- | net/mptcp/protocol.c | 187 | ||||
| -rw-r--r-- | net/mptcp/protocol.h | 31 | ||||
| -rw-r--r-- | net/mptcp/subflow.c | 6 |
9 files changed, 265 insertions, 57 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 7d738bd06f2c..8b235468c88f 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -21,43 +21,50 @@ struct mptcp_pernet { struct ctl_table_header *ctl_table_hdr; #endif - u8 mptcp_enabled; unsigned int add_addr_timeout; + unsigned int stale_loss_cnt; + u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; }; -static struct mptcp_pernet *mptcp_get_pernet(struct net *net) +static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) { return net_generic(net, mptcp_pernet_id); } -int mptcp_is_enabled(struct net *net) +int mptcp_is_enabled(const struct net *net) { return mptcp_get_pernet(net)->mptcp_enabled; } -unsigned int mptcp_get_add_addr_timeout(struct net *net) +unsigned int mptcp_get_add_addr_timeout(const struct net *net) { return mptcp_get_pernet(net)->add_addr_timeout; } -int mptcp_is_checksum_enabled(struct net *net) +int mptcp_is_checksum_enabled(const struct net *net) { return mptcp_get_pernet(net)->checksum_enabled; } -int mptcp_allow_join_id0(struct net *net) +int mptcp_allow_join_id0(const struct net *net) { return mptcp_get_pernet(net)->allow_join_initial_addr_port; } +unsigned int mptcp_stale_loss_cnt(const struct net *net) +{ + return mptcp_get_pernet(net)->stale_loss_cnt; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; + pernet->stale_loss_cnt = 4; } #ifdef CONFIG_SYSCTL @@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "stale_loss_cnt", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, {} }; @@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; + table[4].data = &pernet->stale_loss_cnt; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ff2cc0e3273d..3a7c4e7b2d79 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), + SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), + SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 0663cb12b448..8ec16c991aac 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -38,6 +38,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ + MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ + MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4452455aef7f..e37b6f2fb514 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk, old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); - /* ACK for data not even sent yet? Ignore. */ - if (after64(new_snd_una, snd_nxt)) - new_snd_una = old_snd_una; + /* ACK for data not even sent yet and even above recovery bound? Ignore.*/ + if (unlikely(after64(new_snd_una, snd_nxt))) { + if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt)) + new_snd_una = old_snd_una; + } new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 639271e09604..0ed3e565f8f8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,6 +10,8 @@ #include <net/mptcp.h> #include "protocol.h" +#include "mib.h" + /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -308,6 +310,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, skc); } +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); + + /* keep track of rtx periods with no progress */ + if (!subflow->stale_count) { + subflow->stale_rcv_tstamp = rcv_tstamp; + subflow->stale_count++; + } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { + if (subflow->stale_count < U8_MAX) + subflow->stale_count++; + mptcp_pm_nl_subflow_chk_stale(msk, ssk); + } else { + subflow->stale_count = 0; + mptcp_subflow_set_active(subflow); + } +} + void mptcp_pm_data_init(struct mptcp_sock *msk) { msk->pm.add_addr_signaled = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56263c2c4014..ac0aa6faacfa 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -46,6 +46,7 @@ struct pm_nl_pernet { spinlock_t lock; struct list_head local_addr_list; unsigned int addrs; + unsigned int stale_loss_cnt; unsigned int add_addr_signal_max; unsigned int add_addr_accept_max; unsigned int local_addr_max; @@ -899,6 +900,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, }; +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; + unsigned int active_max_loss_cnt; + struct net *net = sock_net(sk); + unsigned int stale_loss_cnt; + bool slow; + + stale_loss_cnt = mptcp_stale_loss_cnt(net); + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) + return; + + /* look for another available subflow not in loss state */ + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); + mptcp_for_each_subflow(msk, iter) { + if (iter != subflow && mptcp_subflow_active(iter) && + iter->stale_count < active_max_loss_cnt) { + /* we have some alternatives, try to mark this subflow as idle ...*/ + slow = lock_sock_fast(ssk); + if (!tcp_rtx_and_write_queues_empty(ssk)) { + subflow->stale = 1; + __mptcp_retransmit_pending_data(sk); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE); + } + unlock_sock_fast(ssk, slow); + + /* always try to push the pending data regarless of re-injections: + * we can possibly use backup subflows now, and subflow selection + * is cheap under the msk socket lock + */ + __mptcp_push_pending(sk, 0); + return; + } + } +} + static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1922,6 +1960,7 @@ static int __net_init pm_nl_init_net(struct net *net) INIT_LIST_HEAD_RCU(&pernet->local_addr_list); pernet->next_id = 1; + pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); /* No need to initialize other pernet fields, the struct is zeroed at diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a88924947815..22214a58d892 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -411,16 +411,29 @@ static void mptcp_set_datafin_timeout(const struct sock *sk) TCP_RTO_MIN << icsk->icsk_retransmits); } -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) +static void __mptcp_set_timeout(struct sock *sk, long tout) { - long tout = ssk && inet_csk(ssk)->icsk_pending ? - inet_csk(ssk)->icsk_timeout - jiffies : 0; - - if (tout <= 0) - tout = mptcp_sk(sk)->timer_ival; mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } +static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow) +{ + const struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + return inet_csk(ssk)->icsk_pending && !subflow->stale_count ? + inet_csk(ssk)->icsk_timeout - jiffies : 0; +} + +static void mptcp_set_timeout(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + long tout = 0; + + mptcp_for_each_subflow(mptcp_sk(sk), subflow) + tout = max(tout, mptcp_timeout_from_subflow(subflow)); + __mptcp_set_timeout(sk, tout); +} + static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & @@ -531,7 +544,6 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_set_timeout(sk, NULL); mptcp_send_ack(msk); mptcp_close_wake_up(sk); } @@ -791,10 +803,7 @@ static void mptcp_reset_timer(struct sock *sk) if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) return; - /* should never be called with mptcp level timer cleared */ - tout = READ_ONCE(mptcp_sk(sk)->timer_ival); - if (WARN_ON_ONCE(!tout)) - tout = TCP_RTO_MIN; + tout = mptcp_sk(sk)->timer_ival; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); } @@ -1046,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk) if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) break; - if (WARN_ON_ONCE(dfrag == msk->first_pending)) - break; + if (unlikely(dfrag == msk->first_pending)) { + /* in recovery mode can see ack after the current snd head */ + if (WARN_ON_ONCE(!msk->recovery)) + break; + + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + } + dfrag_clear(sk, dfrag); cleaned = true; } @@ -1056,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk) if (dfrag && after64(snd_una, dfrag->data_seq)) { u64 delta = snd_una - dfrag->data_seq; - if (WARN_ON_ONCE(delta > dfrag->already_sent)) - goto out; + /* prevent wrap around in recovery mode */ + if (unlikely(delta > dfrag->already_sent)) { + if (WARN_ON_ONCE(!msk->recovery)) + goto out; + if (WARN_ON_ONCE(delta > dfrag->data_len)) + goto out; + dfrag->already_sent += delta - dfrag->already_sent; + } dfrag->data_seq += delta; dfrag->offset += delta; @@ -1068,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk) cleaned = true; } + /* all retransmitted data acked, recovery completed */ + if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt)) + msk->recovery = false; + out: if (cleaned) { if (tcp_under_memory_pressure(sk)) { @@ -1076,8 +1101,8 @@ out: } } - if (snd_una == READ_ONCE(msk->snd_nxt)) { - if (msk->timer_ival && !mptcp_data_fin_enabled(msk)) + if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { + if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_timer(sk); } else { mptcp_reset_timer(sk); @@ -1366,16 +1391,44 @@ struct subflow_send_info { u64 ratio; }; +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) +{ + if (!subflow->stale) + return; + + subflow->stale = 0; + MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER); +} + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + if (unlikely(subflow->stale)) { + u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp); + + if (subflow->stale_rcv_tstamp == rcv_tstamp) + return false; + + mptcp_subflow_set_active(subflow); + } + return __mptcp_subflow_active(subflow); +} + +/* implement the mptcp packet scheduler; + * returns the subflow that will transmit the next DSS + * additionally updates the rtx timeout + */ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; int i, nr_active = 0; struct sock *ssk; + long tout = 0; u64 ratio; u32 pace; - sock_owned_by_me((struct sock *)msk); + sock_owned_by_me(sk); if (__mptcp_check_fallback(msk)) { if (!msk->first) @@ -1386,8 +1439,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) /* re-use last subflow, if the burst allow that */ if (msk->last_snd && msk->snd_burst > 0 && sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) + mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { + mptcp_set_timeout(sk); return msk->last_snd; + } /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < 2; ++i) { @@ -1400,6 +1455,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) if (!mptcp_subflow_active(subflow)) continue; + tout = max(tout, mptcp_timeout_from_subflow(subflow)); nr_active += !subflow->backup; if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) continue; @@ -1415,6 +1471,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) send_info[subflow->backup].ratio = ratio; } } + __mptcp_set_timeout(sk, tout); /* pick the best backup if no other subflow is active */ if (!nr_active) @@ -1433,12 +1490,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) static void mptcp_push_release(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); release_sock(ssk); } -static void __mptcp_push_pending(struct sock *sk, unsigned int flags) +void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1501,12 +1557,11 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(sk, ssk, &info); out: - if (copied) { - /* start the timer, if it's not pending */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + /* ensure the rtx timer is running */ + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (copied) __mptcp_check_send_data_fin(sk); - } } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) @@ -1567,7 +1622,6 @@ out: */ __mptcp_update_wmem(sk); if (copied) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); if (!mptcp_timer_pending(sk)) @@ -2083,10 +2137,11 @@ static void mptcp_timeout_timer(struct timer_list *t) * * A backup subflow is returned only if that is the only kind available. */ -static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) +static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) { + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; - struct sock *backup = NULL; + int min_stale_count = INT_MAX; sock_owned_by_me((const struct sock *)msk); @@ -2096,14 +2151,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!__mptcp_subflow_active(subflow)) continue; - /* still data outstanding at TCP level? Don't retransmit. */ - if (!tcp_write_queue_empty(ssk)) { - if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss) - continue; - return NULL; + /* still data outstanding at TCP level? skip this */ + if (!tcp_rtx_and_write_queues_empty(ssk)) { + mptcp_pm_subflow_chk_stale(msk, ssk); + min_stale_count = min_t(int, min_stale_count, subflow->stale_count); + continue; } if (subflow->backup) { @@ -2112,10 +2167,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) continue; } - return ssk; + if (!pick) + pick = ssk; } - return backup; + if (pick) + return pick; + + /* use backup only if there are no progresses anywhere */ + return min_stale_count > 1 ? backup : NULL; } static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) @@ -2126,6 +2186,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) } } +bool __mptcp_retransmit_pending_data(struct sock *sk) +{ + struct mptcp_data_frag *cur, *rtx_head; + struct mptcp_sock *msk = mptcp_sk(sk); + + if (__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + if (tcp_rtx_and_write_queues_empty(sk)) + return false; + + /* the closing socket has some data untransmitted and/or unacked: + * some data in the mptcp rtx queue has not really xmitted yet. + * keep it simple and re-inject the whole mptcp level rtx queue + */ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + rtx_head = mptcp_rtx_head(sk); + if (!rtx_head) { + mptcp_data_unlock(sk); + return false; + } + + /* will accept ack for reijected data before re-sending them */ + if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt)) + msk->recovery_snd_nxt = msk->snd_nxt; + msk->recovery = true; + mptcp_data_unlock(sk); + + msk->first_pending = rtx_head; + msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; + msk->snd_nxt = rtx_head->data_seq; + msk->snd_burst = 0; + + /* be sure to clear the "sent status" on all re-injected fragments */ + list_for_each_entry(cur, &msk->rtx_queue, list) { + if (!cur->already_sent) + break; + cur->already_sent = 0; + } + + return true; +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2138,6 +2242,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { struct mptcp_sock *msk = mptcp_sk(sk); + bool need_push; list_del(&subflow->node); @@ -2149,6 +2254,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk->sk_socket) sock_orphan(ssk); + need_push = __mptcp_retransmit_pending_data(sk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2176,6 +2282,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (msk->subflow && ssk == msk->subflow->sk) mptcp_dispose_initial_subflow(msk); + + if (need_push) + __mptcp_push_pending(sk, 0); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2313,7 +2422,6 @@ static void __mptcp_retrans(struct sock *sk) info.size_goal); } - mptcp_set_timeout(sk, ssk); release_sock(ssk); reset_timer: @@ -2384,10 +2492,12 @@ static int __mptcp_init_sock(struct sock *sk) msk->wmem_reserved = 0; WRITE_ONCE(msk->rmem_released, 0); msk->tx_pending_data = 0; + msk->timer_ival = TCP_RTO_MIN; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); + msk->recovery = false; mptcp_pm_data_init(msk); @@ -2472,7 +2582,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) tcp_shutdown(ssk, how); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); - mptcp_set_timeout(sk, ssk); tcp_send_ack(ssk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0f0c026c5f8b..8bdd038def38 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -230,12 +230,17 @@ struct mptcp_sock { struct sock *last_snd; int snd_burst; int old_wspace; + u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; + * recovery related fields are under data_lock + * protection + */ u64 snd_una; u64 wnd_end; unsigned long timer_ival; u32 token; int rmem_released; unsigned long flags; + bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; @@ -427,7 +432,8 @@ struct mptcp_subflow_context { send_mp_prio : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ - disposable : 1; /* ctx can be free at ulp release time */ + disposable : 1, /* ctx can be free at ulp release time */ + stale : 1; /* unable to snd/rcv data, do not use for xmit */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -439,11 +445,13 @@ struct mptcp_subflow_context { u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; + u8 stale_count; long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ - u32 setsockopt_seq; + u32 setsockopt_seq; + u32 stale_rcv_tstamp; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ @@ -549,12 +557,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); } -int mptcp_is_enabled(struct net *net); -unsigned int mptcp_get_add_addr_timeout(struct net *net); -int mptcp_is_checksum_enabled(struct net *net); -int mptcp_allow_join_id0(struct net *net); +int mptcp_is_enabled(const struct net *net); +unsigned int mptcp_get_add_addr_timeout(const struct net *net); +int mptcp_is_checksum_enabled(const struct net *net); +int mptcp_allow_join_id0(const struct net *net); +unsigned int mptcp_stale_loss_cnt(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); +bool __mptcp_retransmit_pending_data(struct sock *sk); +void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); @@ -573,7 +584,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); -static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -585,6 +596,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { @@ -690,6 +705,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 966f777d35ce..1151926d335b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -435,10 +435,12 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto do_reset; } + subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + subflow, subflow->thmac, subflow->remote_nonce, + subflow->backup); if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); |
