summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c5
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c5
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c22
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c4
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c50
-rw-r--r--drivers/net/team/team_core.c4
-rw-r--r--drivers/s390/net/qeth_core_main.c8
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--net/core/dev.c17
-rw-r--r--net/core/skbuff.c103
-rw-r--r--net/ipv6/mcast.c14
-rw-r--r--net/vmw_vsock/af_vsock.c8
-rw-r--r--tools/testing/vsock/vsock_test.c41
15 files changed, 221 insertions, 65 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 5f9535f4f196..b182021f4906 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16508,6 +16508,7 @@ F: include/linux/netdev*
F: include/linux/netlink.h
F: include/linux/netpoll.h
F: include/linux/rtnetlink.h
+F: include/linux/sctp.h
F: include/linux/seq_file_net.h
F: include/linux/skbuff*
F: include/net/
@@ -16524,6 +16525,7 @@ F: include/uapi/linux/netdev*
F: include/uapi/linux/netlink.h
F: include/uapi/linux/netlink_diag.h
F: include/uapi/linux/rtnetlink.h
+F: include/uapi/linux/sctp.h
F: lib/net_utils.c
F: lib/random32.c
F: net/
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index b4fbb99bfad2..a3d6b8f198a8 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -2159,8 +2159,13 @@ static int idpf_open(struct net_device *netdev)
idpf_vport_ctrl_lock(netdev);
vport = idpf_netdev_to_vport(netdev);
+ err = idpf_set_real_num_queues(vport);
+ if (err)
+ goto unlock;
+
err = idpf_vport_open(vport);
+unlock:
idpf_vport_ctrl_unlock(netdev);
return err;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 2fa9c36e33c9..9be6a6b59c4e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3008,8 +3008,6 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb,
return -EINVAL;
rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len);
- if (unlikely(rsc_segments == 1))
- return 0;
NAPI_GRO_CB(skb)->count = rsc_segments;
skb_shinfo(skb)->gso_size = rsc_seg_len;
@@ -3072,6 +3070,7 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
idpf_rx_hash(rxq, skb, rx_desc, decoded);
skb->protocol = eth_type_trans(skb, rxq->netdev);
+ skb_record_rx_queue(skb, rxq->idx);
if (le16_get_bits(rx_desc->hdrlen_flags,
VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
@@ -3080,8 +3079,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
csum_bits = idpf_rx_splitq_extract_csum_bits(rx_desc);
idpf_rx_csum(rxq, skb, csum_bits, decoded);
- skb_record_rx_queue(skb, rxq->idx);
-
return 0;
}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 56a35d58e7a6..84307bb7313e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1096,6 +1096,7 @@ static int igc_init_empty_frame(struct igc_ring *ring,
return -ENOMEM;
}
+ buffer->type = IGC_TX_BUFFER_TYPE_SKB;
buffer->skb = skb;
buffer->protocol = 0;
buffer->bytecount = skb->len;
@@ -2701,8 +2702,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
}
static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
- struct xdp_buff *xdp)
+ struct igc_xdp_buff *ctx)
{
+ struct xdp_buff *xdp = &ctx->xdp;
unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
struct sk_buff *skb;
@@ -2721,27 +2723,28 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
__skb_pull(skb, metasize);
}
+ if (ctx->rx_ts) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
+ skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
+ }
+
return skb;
}
static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
union igc_adv_rx_desc *desc,
- struct xdp_buff *xdp,
- ktime_t timestamp)
+ struct igc_xdp_buff *ctx)
{
struct igc_ring *ring = q_vector->rx.ring;
struct sk_buff *skb;
- skb = igc_construct_skb_zc(ring, xdp);
+ skb = igc_construct_skb_zc(ring, ctx);
if (!skb) {
ring->rx_stats.alloc_failed++;
set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags);
return;
}
- if (timestamp)
- skb_hwtstamps(skb)->hwtstamp = timestamp;
-
if (igc_cleanup_headers(ring, desc, skb))
return;
@@ -2777,7 +2780,6 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
union igc_adv_rx_desc *desc;
struct igc_rx_buffer *bi;
struct igc_xdp_buff *ctx;
- ktime_t timestamp = 0;
unsigned int size;
int res;
@@ -2807,6 +2809,8 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
*/
bi->xdp->data_meta += IGC_TS_HDR_LEN;
size -= IGC_TS_HDR_LEN;
+ } else {
+ ctx->rx_ts = NULL;
}
bi->xdp->data_end = bi->xdp->data + size;
@@ -2815,7 +2819,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
switch (res) {
case IGC_XDP_PASS:
- igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
+ igc_dispatch_skb_zc(q_vector, desc, ctx);
fallthrough;
case IGC_XDP_CONSUMED:
xsk_buff_free(bi->xdp);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7236f20c9a30..467f81239e12 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2105,7 +2105,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
/* hand second half of page back to the ring */
ixgbe_reuse_rx_page(rx_ring, rx_buffer);
} else {
- if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
+ if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) {
/* the page has been released from the ring */
IXGBE_CB(skb)->page_released = true;
} else {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 2bed8c86b7cf..3f64cdbabfa3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -768,7 +768,9 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp);
if (err)
return;
- mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
+ err = mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
+ if (err)
+ return;
for (i = 0; i < len; i++) {
data[data_index + i] = hw_stats[i].getter(ppcnt_pl);
if (!hw_stats[i].cells_bytes)
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index b663271e79f7..2806238629f8 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -828,21 +828,30 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
{
struct am65_cpsw_tx_chn *tx_chn = data;
+ enum am65_cpsw_tx_buf_type buf_type;
struct cppi5_host_desc_t *desc_tx;
+ struct xdp_frame *xdpf;
struct sk_buff *skb;
void **swdata;
desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
swdata = cppi5_hdesc_get_swdata(desc_tx);
- skb = *(swdata);
- am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
+ buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma);
+ if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) {
+ skb = *(swdata);
+ dev_kfree_skb_any(skb);
+ } else {
+ xdpf = *(swdata);
+ xdp_return_frame(xdpf);
+ }
- dev_kfree_skb_any(skb);
+ am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
}
static struct sk_buff *am65_cpsw_build_skb(void *page_addr,
struct net_device *ndev,
- unsigned int len)
+ unsigned int len,
+ unsigned int headroom)
{
struct sk_buff *skb;
@@ -852,7 +861,7 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr,
if (unlikely(!skb))
return NULL;
- skb_reserve(skb, AM65_CPSW_HEADROOM);
+ skb_reserve(skb, headroom);
skb->dev = ndev;
return skb;
@@ -1169,9 +1178,11 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
struct xdp_frame *xdpf;
struct bpf_prog *prog;
struct page *page;
+ int pkt_len;
u32 act;
int err;
+ pkt_len = *len;
prog = READ_ONCE(port->xdp_prog);
if (!prog)
return AM65_CPSW_XDP_PASS;
@@ -1189,8 +1200,10 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
xdpf = xdp_convert_buff_to_frame(xdp);
- if (unlikely(!xdpf))
+ if (unlikely(!xdpf)) {
+ ndev->stats.tx_dropped++;
goto drop;
+ }
__netif_tx_lock(netif_txq, cpu);
err = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf,
@@ -1199,14 +1212,14 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
if (err)
goto drop;
- dev_sw_netstats_tx_add(ndev, 1, *len);
+ dev_sw_netstats_rx_add(ndev, pkt_len);
ret = AM65_CPSW_XDP_CONSUMED;
goto out;
case XDP_REDIRECT:
if (unlikely(xdp_do_redirect(ndev, xdp, prog)))
goto drop;
- dev_sw_netstats_rx_add(ndev, *len);
+ dev_sw_netstats_rx_add(ndev, pkt_len);
ret = AM65_CPSW_XDP_REDIRECT;
goto out;
default:
@@ -1315,16 +1328,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info);
dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
-
k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
- skb = am65_cpsw_build_skb(page_addr, ndev,
- AM65_CPSW_MAX_PACKET_SIZE);
- if (unlikely(!skb)) {
- new_page = page;
- goto requeue;
- }
-
if (port->xdp_prog) {
xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
@@ -1334,9 +1339,16 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
if (*xdp_state != AM65_CPSW_XDP_PASS)
goto allocate;
- /* Compute additional headroom to be reserved */
- headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb);
- skb_reserve(skb, headroom);
+ headroom = xdp.data - xdp.data_hard_start;
+ } else {
+ headroom = AM65_CPSW_HEADROOM;
+ }
+
+ skb = am65_cpsw_build_skb(page_addr, ndev,
+ AM65_CPSW_MAX_PACKET_SIZE, headroom);
+ if (unlikely(!skb)) {
+ new_page = page;
+ goto requeue;
}
ndev_priv = netdev_priv(ndev);
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c
index dc7cbd6a9798..f4019815f473 100644
--- a/drivers/net/team/team_core.c
+++ b/drivers/net/team/team_core.c
@@ -2639,7 +2639,9 @@ int team_nl_options_set_doit(struct sk_buff *skb, struct genl_info *info)
ctx.data.u32_val = nla_get_u32(attr_data);
break;
case TEAM_OPTION_TYPE_STRING:
- if (nla_len(attr_data) > TEAM_STRING_MAX_LEN) {
+ if (nla_len(attr_data) > TEAM_STRING_MAX_LEN ||
+ !memchr(nla_data(attr_data), '\0',
+ nla_len(attr_data))) {
err = -EINVAL;
goto team_put;
}
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a3adaec5504e..20328d695ef9 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -7050,14 +7050,16 @@ int qeth_open(struct net_device *dev)
card->data.state = CH_STATE_UP;
netif_tx_start_all_queues(dev);
- local_bh_disable();
qeth_for_each_output_queue(card, queue, i) {
netif_napi_add_tx(dev, &queue->napi, qeth_tx_poll);
napi_enable(&queue->napi);
- napi_schedule(&queue->napi);
}
-
napi_enable(&card->napi);
+
+ local_bh_disable();
+ qeth_for_each_output_queue(card, queue, i) {
+ napi_schedule(&queue->napi);
+ }
napi_schedule(&card->napi);
/* kick-start the NAPI softirq: */
local_bh_enable();
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 365f0e2098d1..c0a86afb85da 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,6 +4115,7 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
+void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index 55e356a68db6..b91658e8aedb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,23 +6920,6 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-static void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6a99c453397f..a441613a1e6c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,9 +220,67 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
+#if PAGE_SIZE == SZ_4K
+
+#define NAPI_HAS_SMALL_PAGE_FRAG 1
+#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
+
+/* specialized page frag allocator using a single order 0 page
+ * and slicing it into 1K sized fragment. Constrained to systems
+ * with a very limited amount of 1K fragments fitting a single
+ * page - to avoid excessive truesize underestimation
+ */
+
+struct page_frag_1k {
+ void *va;
+ u16 offset;
+ bool pfmemalloc;
+};
+
+static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
+{
+ struct page *page;
+ int offset;
+
+ offset = nc->offset - SZ_1K;
+ if (likely(offset >= 0))
+ goto use_frag;
+
+ page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+ if (!page)
+ return NULL;
+
+ nc->va = page_address(page);
+ nc->pfmemalloc = page_is_pfmemalloc(page);
+ offset = PAGE_SIZE - SZ_1K;
+ page_ref_add(page, offset / SZ_1K);
+
+use_frag:
+ nc->offset = offset;
+ return nc->va + offset;
+}
+#else
+
+/* the small page is actually unused in this build; add dummy helpers
+ * to please the compiler and avoid later preprocessor's conditionals
+ */
+#define NAPI_HAS_SMALL_PAGE_FRAG 0
+#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
+
+struct page_frag_1k {
+};
+
+static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
+{
+ return NULL;
+}
+
+#endif
+
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
+ struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -232,6 +290,23 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -738,8 +813,10 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
+ * When the small frag allocator is available, prefer it over kmalloc
+ * for small fragments
*/
- if (len <= SKB_WITH_OVERHEAD(1024) ||
+ if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -749,16 +826,32 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
- len = SKB_HEAD_ALIGN(len);
-
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
+ if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
+ /* we are artificially inflating the allocation size, but
+ * that is not as bad as it may look like, as:
+ * - 'len' less than GRO_MAX_HEAD makes little sense
+ * - On most systems, larger 'len' values lead to fragment
+ * size above 512 bytes
+ * - kmalloc would use the kmalloc-1k slab for such values
+ * - Builds with smaller GRO_MAX_HEAD will very likely do
+ * little networking, as that implies no WiFi and no
+ * tunnels support, and 32 bits arches.
+ */
+ len = SZ_1K;
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
+ data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
+ pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
+ } else {
+ len = SKB_HEAD_ALIGN(len);
+
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
+ }
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 81a739ebf709..65831b4fee1f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1773,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
struct net_device *dev = idev->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct net *net = dev_net(dev);
const struct in6_addr *saddr;
struct in6_addr addr_buf;
struct mld2_report *pmr;
struct sk_buff *skb;
unsigned int size;
struct sock *sk;
- int err;
+ struct net *net;
- sk = net->ipv6.igmp_sk;
/* we assume size > sizeof(ra) here
* Also try to not allocate high-order pages for big MTU
*/
size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
- skb = sock_alloc_send_skb(sk, size, 1, &err);
+ skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return NULL;
@@ -1795,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
@@ -1806,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ rcu_read_unlock();
+
skb_put_data(skb, ra, sizeof(ra));
skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 075695173648..53a081d49d28 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -824,13 +824,19 @@ static void __vsock_release(struct sock *sk, int level)
*/
lock_sock_nested(sk, level);
- sock_orphan(sk);
+ /* Indicate to vsock_remove_sock() that the socket is being released and
+ * can be removed from the bound_table. Unlike transport reassignment
+ * case, where the socket must remain bound despite vsock_remove_sock()
+ * being called from the transport release() callback.
+ */
+ sock_set_flag(sk, SOCK_DEAD);
if (vsk->transport)
vsk->transport->release(vsk);
else if (sock_type_connectible(sk->sk_type))
vsock_remove_sock(vsk);
+ sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
skb_queue_purge(&sk->sk_receive_queue);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dfff8b288265..d0f6d253ac72 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
close(fd);
}
+static void test_stream_linger_client(const struct test_opts *opts)
+{
+ struct linger optval = {
+ .l_onoff = 1,
+ .l_linger = 1
+ };
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+ perror("setsockopt(SO_LINGER)");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_connect_retry_client,
.run_server = test_stream_connect_retry_server,
},
+ {
+ .name = "SOCK_STREAM SO_LINGER null-ptr-deref",
+ .run_client = test_stream_linger_client,
+ .run_server = test_stream_linger_server,
+ },
{},
};