summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/skbuff.h19
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/tcp.h15
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ipv4/tcp.c28
-rw-r--r--net/ipv4/tcp_output.c9
7 files changed, 64 insertions, 20 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0bd6520329f6..10869906cc57 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -454,9 +454,15 @@ enum {
* all frags to avoid possible bad checksum
*/
SKBFL_SHARED_FRAG = BIT(1),
+
+ /* segment contains only zerocopy data and should not be
+ * charged to the kernel memory.
+ */
+ SKBFL_PURE_ZEROCOPY = BIT(2),
};
#define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
+#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
/*
* The callback notifies userspace to release buffers when skb DMA is done in
@@ -1464,6 +1470,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
return is_zcopy ? skb_uarg(skb) : NULL;
}
+static inline bool skb_zcopy_pure(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
+}
+
+static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
+ const struct sk_buff *skb2)
+{
+ return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
+}
+
static inline void net_zcopy_get(struct ubuf_info *uarg)
{
refcount_inc(&uarg->refcnt);
@@ -1528,7 +1545,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
if (!skb_zcopy_is_nouarg(skb))
uarg->callback(skb, uarg, zerocopy_success);
- skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG;
+ skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
}
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 620de053002d..b32906e1ab55 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1603,13 +1603,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
__sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
}
-static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
-{
- sk_wmem_queued_add(sk, -skb->truesize);
- sk_mem_uncharge(sk, skb->truesize);
- __kfree_skb(skb);
-}
-
static inline void sock_release_ownership(struct sock *sk)
{
if (sk->sk_lock.owned) {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8e8c5922a7b0..af91f370432e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -290,6 +290,16 @@ static inline bool tcp_out_of_memory(struct sock *sk)
return false;
}
+static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
+{
+ sk_wmem_queued_add(sk, -skb->truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_uncharge(sk, skb->truesize);
+ else
+ sk_mem_uncharge(sk, SKB_TRUESIZE(MAX_TCP_HEADER));
+ __kfree_skb(skb);
+}
+
void sk_forced_mem_schedule(struct sock *sk, int size);
bool tcp_check_oom(struct sock *sk, int shift);
@@ -967,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return likely(tcp_skb_can_collapse_to(to) &&
- mptcp_skb_can_collapse(to, from));
+ mptcp_skb_can_collapse(to, from) &&
+ skb_pure_zcopy_same(to, from));
}
/* Events passed to congestion control interface */
@@ -1875,7 +1886,7 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc
{
list_del(&skb->tcp_tsorted_anchor);
tcp_rtx_queue_unlink(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
static inline void tcp_push_pending_frames(struct sock *sk)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..ee290776c661 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
- sk_mem_charge(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 67a9188d8a49..29e617d8d7fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3433,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
+ const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
- skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+ skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a7b1138d619c..2561c14a6e63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -863,6 +863,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
if (likely(skb)) {
bool mem_scheduled;
+ skb->truesize = SKB_TRUESIZE(size + MAX_TCP_HEADER);
if (force_schedule) {
mem_scheduled = true;
sk_forced_mem_schedule(sk, skb->truesize);
@@ -932,7 +933,7 @@ void tcp_remove_empty_skb(struct sock *sk)
tcp_unlink_write_queue(skb, sk);
if (tcp_write_queue_empty(sk))
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
}
@@ -1319,6 +1320,15 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
+ /* skb changing from pure zc to mixed, must charge zc */
+ if (unlikely(skb_zcopy_pure(skb))) {
+ if (!sk_wmem_schedule(sk, skb->data_len))
+ goto wait_for_space;
+
+ sk_mem_charge(sk, skb->data_len);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+
if (!sk_wmem_schedule(sk, copy))
goto wait_for_space;
@@ -1339,8 +1349,16 @@ new_segment:
}
pfrag->offset += copy;
} else {
- if (!sk_wmem_schedule(sk, copy))
- goto wait_for_space;
+ /* First append to a fragless skb builds initial
+ * pure zerocopy skb
+ */
+ if (!skb->len)
+ skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
+
+ if (!skb_zcopy_pure(skb)) {
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_space;
+ }
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
if (err == -EMSGSIZE || err == -EEXIST) {
@@ -2893,7 +2911,7 @@ static void tcp_rtx_queue_purge(struct sock *sk)
* list_del(&skb->tcp_tsorted_anchor)
*/
tcp_rtx_queue_unlink(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
}
@@ -2904,7 +2922,7 @@ void tcp_write_queue_purge(struct sock *sk)
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
tcp_skb_tsorted_anchor_cleanup(skb);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
}
tcp_rtx_queue_purge(sk);
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6867e5db3e35..287b57aadc37 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1677,7 +1677,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
if (delta_truesize) {
skb->truesize -= delta_truesize;
sk_wmem_queued_add(sk, -delta_truesize);
- sk_mem_uncharge(sk, delta_truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_uncharge(sk, delta_truesize);
}
/* Any change of skb->len requires recalculation of tso factor. */
@@ -2295,7 +2296,9 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
+ if (unlikely(TCP_SKB_CB(skb)->eor) ||
+ tcp_has_tx_tstamp(skb) ||
+ !skb_pure_zcopy_same(skb, next))
return false;
len -= skb->len;
@@ -2412,7 +2415,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
tcp_skb_collapse_tstamp(nskb, skb);
tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_wmem_free_skb(sk, skb);
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);