summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c110
1 files changed, 53 insertions, 57 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a35018e2d0ba..8ec92dec321a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -664,10 +664,12 @@ EXPORT_IPV6_MOD(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt_us;
- long m = sample;
+ u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us;
+ long m = sample << 3;
- if (new_sample != 0) {
+ if (old_sample == 0 || m < old_sample) {
+ new_sample = m;
+ } else {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
@@ -678,17 +680,12 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
* else with timestamps disabled convergence takes too
* long.
*/
- if (!win_dep) {
- m -= (new_sample >> 3);
- new_sample += m;
- } else {
- m <<= 3;
- if (m < new_sample)
- new_sample = m;
- }
- } else {
- /* No previous measure. */
- new_sample = m << 3;
+ if (win_dep)
+ return;
+ /* Do not use this sample if receive queue is not empty. */
+ if (tp->rcv_nxt != tp->copied_seq)
+ return;
+ new_sample = old_sample - (old_sample >> 3) + sample;
}
tp->rcv_rtt_est.rtt_us = new_sample;
@@ -712,7 +709,7 @@ new_measure:
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
-static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
+static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta)
{
u32 delta, delta_us;
@@ -722,7 +719,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
if (!delta)
- delta = 1;
+ delta = min_delta;
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
return delta_us;
}
@@ -740,13 +737,39 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
- s32 delta = tcp_rtt_tsopt_us(tp);
+ s32 delta = tcp_rtt_tsopt_us(tp, 0);
- if (delta >= 0)
+ if (delta > 0)
tcp_rcv_rtt_update(tp, delta, 0);
}
}
+static void tcp_rcvbuf_grow(struct sock *sk)
+{
+ const struct net *net = sock_net(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int rcvwin, rcvbuf, cap;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
+ (sk->sk_userlocks & SOCK_RCVBUF_LOCK))
+ return;
+
+ /* slow start: allow the sender to double its rate. */
+ rcvwin = tp->rcvq_space.space << 1;
+
+ if (!RB_EMPTY_ROOT(&tp->out_of_order_queue))
+ rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt;
+
+ cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+
+ rcvbuf = min_t(u32, tcp_space_from_win(sk, rcvwin), cap);
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+ /* Make the window clamp follow along. */
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
+ }
+}
/*
* This function should be called every time data is copied to user space.
* It calculates the appropriate TCP receive buffer space.
@@ -754,8 +777,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 copied;
- int time;
+ int time, inq, copied;
trace_tcp_rcv_space_adjust(sk);
@@ -766,45 +788,18 @@ void tcp_rcv_space_adjust(struct sock *sk)
/* Number of bytes copied to user in last RTT */
copied = tp->copied_seq - tp->rcvq_space.seq;
+ /* Number of bytes in receive queue. */
+ inq = tp->rcv_nxt - tp->copied_seq;
+ copied -= inq;
if (copied <= tp->rcvq_space.space)
goto new_measure;
- /* A bit of theory :
- * copied = bytes received in previous RTT, our base window
- * To cope with packet losses, we need a 2x factor
- * To cope with slow start, and sender growing its cwin by 100 %
- * every RTT, we need a 4x factor, because the ACK we are sending
- * now is for the next RTT, not the current one :
- * <prev RTT . ><current RTT .. ><next RTT .... >
- */
-
- if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- u64 rcvwin, grow;
- int rcvbuf;
-
- /* minimal window to cope with packet losses, assuming
- * steady state. Add some cushion because of small variations.
- */
- rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
+ trace_tcp_rcvbuf_grow(sk, time);
- /* Accommodate for sender rate increase (eg. slow start) */
- grow = rcvwin * (copied - tp->rcvq_space.space);
- do_div(grow, tp->rcvq_space.space);
- rcvwin += (grow << 1);
-
- rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- if (rcvbuf > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
-
- /* Make the window clamp follow along. */
- WRITE_ONCE(tp->window_clamp,
- tcp_win_from_space(sk, rcvbuf));
- }
- }
tp->rcvq_space.space = copied;
+ tcp_rcvbuf_grow(sk);
+
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
tp->rcvq_space.time = tp->tcp_mstamp;
@@ -3226,7 +3221,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1);
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -5173,6 +5168,7 @@ end:
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
+ tcp_rcvbuf_grow(sk);
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
@@ -6873,6 +6869,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
+ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
if (req) {
tcp_rcv_synrecv_state_fastopen(sk);
} else {
@@ -6898,9 +6897,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- if (tp->rx_opt.tstamp_ok)
- tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);