summaryrefslogtreecommitdiff
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c445
1 files changed, 265 insertions, 180 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6841e61a6bd0..85fc82f72d26 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,11 +64,13 @@
#include <linux/mpls.h>
#include <linux/kcov.h>
#include <linux/iov_iter.h>
+#include <linux/crc32.h>
#include <net/protocol.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
+#include <net/gro.h>
#include <net/gso.h>
#include <net/hotdata.h>
#include <net/ip6_checksum.h>
@@ -88,6 +90,7 @@
#include <linux/textsearch.h>
#include "dev.h"
+#include "devmem.h"
#include "netmem_priv.h"
#include "sock_destructor.h"
@@ -95,7 +98,9 @@
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
+ GRO_MAX_HEAD_PAD))
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
@@ -220,67 +225,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +237,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -367,6 +297,68 @@ static struct sk_buff *napi_skb_cache_get(void)
return skb;
}
+/**
+ * napi_skb_cache_get_bulk - obtain a number of zeroed skb heads from the cache
+ * @skbs: pointer to an at least @n-sized array to fill with skb pointers
+ * @n: number of entries to provide
+ *
+ * Tries to obtain @n &sk_buff entries from the NAPI percpu cache and writes
+ * the pointers into the provided array @skbs. If there are less entries
+ * available, tries to replenish the cache and bulk-allocates the diff from
+ * the MM layer if needed.
+ * The heads are being zeroed with either memset() or %__GFP_ZERO, so they are
+ * ready for {,__}build_skb_around() and don't have any data buffers attached.
+ * Must be called *only* from the BH context.
+ *
+ * Return: number of successfully allocated skbs (@n if no actual allocation
+ * needed or kmem_cache_alloc_bulk() didn't fail).
+ */
+u32 napi_skb_cache_get_bulk(void **skbs, u32 n)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ u32 bulk, total = n;
+
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+
+ if (nc->skb_count >= n)
+ goto get;
+
+ /* No enough cached skbs. Try refilling the cache first */
+ bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK);
+ nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN, bulk,
+ &nc->skb_cache[nc->skb_count]);
+ if (likely(nc->skb_count >= n))
+ goto get;
+
+ /* Still not enough. Bulk-allocate the missing part directly, zeroed */
+ n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN,
+ n - nc->skb_count, &skbs[nc->skb_count]);
+ if (likely(nc->skb_count >= n))
+ goto get;
+
+ /* kmem_cache didn't allocate the number we need, limit the output */
+ total -= n - nc->skb_count;
+ n = nc->skb_count;
+
+get:
+ for (u32 base = nc->skb_count - n, i = 0; i < n; i++) {
+ u32 cache_size = kmem_cache_size(net_hotdata.skbuff_cache);
+
+ skbs[i] = nc->skb_cache[base + i];
+
+ kasan_mempool_unpoison_object(skbs[i], cache_size);
+ memset(skbs[i], 0, offsetof(struct sk_buff, tail));
+ }
+
+ nc->skb_count -= n;
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+
+ return total;
+}
+EXPORT_SYMBOL_GPL(napi_skb_cache_get_bulk);
+
static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
unsigned int size)
{
@@ -736,7 +728,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
*/
- if (len <= SKB_WITH_OVERHEAD(1024) ||
+ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
@@ -813,10 +805,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +816,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
-
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
@@ -921,11 +895,6 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_netmem(netmem_ref netmem)
-{
- return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
-}
-
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
@@ -1009,7 +978,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_pp_cow_data);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
- struct bpf_prog *prog)
+ const struct bpf_prog *prog)
{
if (!prog->aux->xdp_has_frags)
return -EINVAL;
@@ -1023,14 +992,7 @@ bool napi_pp_put_page(netmem_ref netmem)
{
netmem = netmem_compound_head(netmem);
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely(!is_pp_netmem(netmem)))
+ if (unlikely(!netmem_is_pp(netmem)))
return false;
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
@@ -1070,7 +1032,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
for (i = 0; i < shinfo->nr_frags; i++) {
head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
- if (likely(is_pp_netmem(head_netmem)))
+ if (likely(netmem_is_pp(head_netmem)))
page_pool_ref_netmem(head_netmem);
else
page_ref_inc(netmem_to_page(head_netmem));
@@ -1694,7 +1656,8 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
-static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
+static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size,
+ bool devmem)
{
struct ubuf_info_msgzc *uarg;
struct sk_buff *skb;
@@ -1709,7 +1672,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
uarg = (void *)skb->cb;
uarg->mmp.user = NULL;
- if (mm_account_pinned_pages(&uarg->mmp, size)) {
+ if (likely(!devmem) && mm_account_pinned_pages(&uarg->mmp, size)) {
kfree_skb(skb);
return NULL;
}
@@ -1732,7 +1695,7 @@ static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
}
struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
- struct ubuf_info *uarg)
+ struct ubuf_info *uarg, bool devmem)
{
if (uarg) {
struct ubuf_info_msgzc *uarg_zc;
@@ -1762,7 +1725,8 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
next = (u32)atomic_read(&sk->sk_zckey);
if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
- if (mm_account_pinned_pages(&uarg_zc->mmp, size))
+ if (likely(!devmem) &&
+ mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL;
uarg_zc->len++;
uarg_zc->bytelen = bytelen;
@@ -1777,7 +1741,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
}
new_alloc:
- return msg_zerocopy_alloc(sk, size);
+ return msg_zerocopy_alloc(sk, size, devmem);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
@@ -1881,7 +1845,8 @@ EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops);
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
- struct ubuf_info *uarg)
+ struct ubuf_info *uarg,
+ struct net_devmem_dmabuf_binding *binding)
{
int err, orig_len = skb->len;
@@ -1900,7 +1865,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return -EEXIST;
}
- err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
+ err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len,
+ binding);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
struct sock *save_sk = skb->sk;
@@ -3267,7 +3233,7 @@ static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg)
typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
- int len, sendmsg_func sendmsg)
+ int len, sendmsg_func sendmsg, int flags)
{
unsigned int orig_len = len;
struct sk_buff *head = skb;
@@ -3285,7 +3251,7 @@ do_frag_list:
kv.iov_base = skb->data + offset;
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
+ msg.msg_flags = MSG_DONTWAIT | flags;
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
@@ -3322,7 +3288,8 @@ do_frag_list:
while (slen) {
struct bio_vec bvec;
struct msghdr msg = {
- .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT,
+ .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
+ flags,
};
bvec_set_page(&bvec, skb_frag_page(frag), slen,
@@ -3368,14 +3335,21 @@ error:
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len)
{
- return __skb_send_sock(sk, skb, offset, len, sendmsg_locked);
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, 0);
}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb,
+ int offset, int len, int flags)
+{
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, flags);
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked_with_flags);
+
/* Send skb data on a socket. Socket must be unlocked. */
int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
{
- return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked);
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, 0);
}
/**
@@ -3471,8 +3445,7 @@ fault:
EXPORT_SYMBOL(skb_store_bits);
/* Checksum skb data. */
-__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
- __wsum csum, const struct skb_checksum_ops *ops)
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -3483,8 +3456,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
if (copy > 0) {
if (copy > len)
copy = len;
- csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
- skb->data + offset, copy, csum);
+ csum = csum_partial(skb->data + offset, copy, csum);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -3514,13 +3486,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
skb_frag_off(frag) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
- csum2 = INDIRECT_CALL_1(ops->update,
- csum_partial_ext,
- vaddr + p_off, p_len, 0);
+ csum2 = csum_partial(vaddr + p_off, p_len, 0);
kunmap_atomic(vaddr);
- csum = INDIRECT_CALL_1(ops->combine,
- csum_block_add_ext, csum,
- csum2, pos, p_len);
+ csum = csum_block_add(csum, csum2, pos);
pos += p_len;
}
@@ -3541,10 +3509,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum csum2;
if (copy > len)
copy = len;
- csum2 = __skb_checksum(frag_iter, offset - start,
- copy, 0, ops);
- csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
- csum, csum2, pos, copy);
+ csum2 = skb_checksum(frag_iter, offset - start, copy,
+ 0);
+ csum = csum_block_add(csum, csum2, pos);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -3556,18 +3523,6 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
return csum;
}
-EXPORT_SYMBOL(__skb_checksum);
-
-__wsum skb_checksum(const struct sk_buff *skb, int offset,
- int len, __wsum csum)
-{
- const struct skb_checksum_ops ops = {
- .update = csum_partial_ext,
- .combine = csum_block_add_ext,
- };
-
- return __skb_checksum(skb, offset, len, csum, &ops);
-}
EXPORT_SYMBOL(skb_checksum);
/* Both of above in one bottle. */
@@ -3660,6 +3615,78 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+#ifdef CONFIG_NET_CRC32C
+u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ struct sk_buff *frag_iter;
+
+ if (copy > 0) {
+ copy = min(copy, len);
+ crc = crc32c(crc, skb->data + offset, copy);
+ len -= copy;
+ if (len == 0)
+ return crc;
+ offset += copy;
+ }
+
+ if (WARN_ON_ONCE(!skb_frags_readable(skb)))
+ return 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ WARN_ON(start > offset + len);
+
+ end = start + skb_frag_size(frag);
+ copy = end - offset;
+ if (copy > 0) {
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
+
+ copy = min(copy, len);
+ skb_frag_foreach_page(frag,
+ skb_frag_off(frag) + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_atomic(p);
+ crc = crc32c(crc, vaddr + p_off, p_len);
+ kunmap_atomic(vaddr);
+ }
+ len -= copy;
+ if (len == 0)
+ return crc;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ copy = end - offset;
+ if (copy > 0) {
+ copy = min(copy, len);
+ crc = skb_crc32c(frag_iter, offset - start, copy, crc);
+ len -= copy;
+ if (len == 0)
+ return crc;
+ offset += copy;
+ }
+ start = end;
+ }
+ BUG_ON(len);
+
+ return crc;
+}
+EXPORT_SYMBOL(skb_crc32c);
+#endif /* CONFIG_NET_CRC32C */
+
__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
__sum16 sum;
@@ -3719,32 +3746,6 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(__skb_checksum_complete);
-static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
-{
- net_warn_ratelimited(
- "%s: attempt to compute crc32c without libcrc32c.ko\n",
- __func__);
- return 0;
-}
-
-static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
- int offset, int len)
-{
- net_warn_ratelimited(
- "%s: attempt to compute crc32c without libcrc32c.ko\n",
- __func__);
- return 0;
-}
-
-static const struct skb_checksum_ops default_crc32c_ops = {
- .update = warn_crc32c_csum_update,
- .combine = warn_crc32c_csum_combine,
-};
-
-const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
- &default_crc32c_ops;
-EXPORT_SYMBOL(crc32c_csum_stub);
-
/**
* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
* @from: source buffer
@@ -5539,6 +5540,54 @@ err:
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb,
+ struct skb_shared_hwtstamps *hwtstamps,
+ int tstype)
+{
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ return skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP;
+ case SCM_TSTAMP_SND:
+ return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP_NOBPF :
+ SKBTX_SW_TSTAMP);
+ case SCM_TSTAMP_ACK:
+ return TCP_SKB_CB(skb)->txstamp_ack & TSTAMP_ACK_SK;
+ case SCM_TSTAMP_COMPLETION:
+ return skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP;
+ }
+
+ return false;
+}
+
+static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb,
+ struct skb_shared_hwtstamps *hwtstamps,
+ struct sock *sk,
+ int tstype)
+{
+ int op;
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ op = BPF_SOCK_OPS_TSTAMP_SCHED_CB;
+ break;
+ case SCM_TSTAMP_SND:
+ if (hwtstamps) {
+ op = BPF_SOCK_OPS_TSTAMP_SND_HW_CB;
+ *skb_hwtstamps(skb) = *hwtstamps;
+ } else {
+ op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB;
+ }
+ break;
+ case SCM_TSTAMP_ACK:
+ op = BPF_SOCK_OPS_TSTAMP_ACK_CB;
+ break;
+ default:
+ return;
+ }
+
+ bpf_skops_tx_timestamping(sk, skb, op);
+}
+
void __skb_tstamp_tx(struct sk_buff *orig_skb,
const struct sk_buff *ack_skb,
struct skb_shared_hwtstamps *hwtstamps,
@@ -5551,6 +5600,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
+ if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
+ skb_tstamp_tx_report_bpf_timestamping(orig_skb, hwtstamps,
+ sk, tstype);
+
+ if (!skb_tstamp_tx_report_so_timestamping(orig_skb, hwtstamps, tstype))
+ return;
+
tsflags = READ_ONCE(sk->sk_tsflags);
if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
@@ -6123,11 +6179,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->offload_fwd_mark = 0;
skb->offload_l3_fwd_mark = 0;
#endif
+ ipvs_reset(skb);
if (!xnet)
return;
- ipvs_reset(skb);
skb->mark = 0;
skb_clear_tstamp(skb);
}
@@ -7290,3 +7346,32 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
return false;
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
+
+void get_netmem(netmem_ref netmem)
+{
+ struct net_iov *niov;
+
+ if (netmem_is_net_iov(netmem)) {
+ niov = netmem_to_net_iov(netmem);
+ if (net_is_devmem_iov(niov))
+ net_devmem_get_net_iov(netmem_to_net_iov(netmem));
+ return;
+ }
+ get_page(netmem_to_page(netmem));
+}
+EXPORT_SYMBOL(get_netmem);
+
+void put_netmem(netmem_ref netmem)
+{
+ struct net_iov *niov;
+
+ if (netmem_is_net_iov(netmem)) {
+ niov = netmem_to_net_iov(netmem);
+ if (net_is_devmem_iov(niov))
+ net_devmem_put_net_iov(netmem_to_net_iov(netmem));
+ return;
+ }
+
+ put_page(netmem_to_page(netmem));
+}
+EXPORT_SYMBOL(put_netmem);