diff options
119 files changed, 1248 insertions, 713 deletions
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5d603ef39bad..b88c63fbf7fb 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2527,10 +2527,17 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev, } btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi"); - err = request_firmware(&fw, fwname, &hdev->dev); + err = firmware_request_nowarn(&fw, fwname, &hdev->dev); if (err < 0) { + if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { + /* Firmware has already been loaded */ + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + return 0; + } + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", fwname, err); + return err; } @@ -2680,12 +2687,24 @@ download: err = btusb_setup_intel_new_get_fw_name(ver, params, fwname, sizeof(fwname), "sfi"); if (err < 0) { + if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { + /* Firmware has already been loaded */ + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + return 0; + } + bt_dev_err(hdev, "Unsupported Intel firmware naming"); return -EINVAL; } - err = request_firmware(&fw, fwname, &hdev->dev); + err = firmware_request_nowarn(&fw, fwname, &hdev->dev); if (err < 0) { + if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) { + /* Firmware has already been loaded */ + set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); + return 0; + } + bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)", fwname, err); return err; diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index d17482395a4d..4ffbfd534f18 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty) rtnl_lock(); result = register_netdevice(dev); if (result) { + tty_kref_put(tty); rtnl_unlock(); free_netdev(dev); return -ENODEV; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 881f88754bf6..52571486705e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget) static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, struct ena_tx_buffer *tx_info, struct xdp_frame *xdpf, - void **push_hdr, - u32 *push_len) + struct ena_com_tx_ctx *ena_tx_ctx) { struct ena_adapter *adapter = xdp_ring->adapter; struct ena_com_buf *ena_buf; - dma_addr_t dma = 0; + int push_len = 0; + dma_addr_t dma; + void *data; u32 size; tx_info->xdpf = xdpf; + data = tx_info->xdpf->data; size = tx_info->xdpf->len; - ena_buf = tx_info->bufs; - /* llq push buffer */ - *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); - *push_hdr = tx_info->xdpf->data; + if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* Designate part of the packet for LLQ */ + push_len = min_t(u32, size, xdp_ring->tx_max_header_size); + + ena_tx_ctx->push_header = data; + + size -= push_len; + data += push_len; + } + + ena_tx_ctx->header_len = push_len; - if (size - *push_len > 0) { + if (size > 0) { dma = dma_map_single(xdp_ring->dev, - *push_hdr + *push_len, - size - *push_len, + data, + size, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) goto error_report_dma_error; - tx_info->map_linear_data = 1; - tx_info->num_of_bufs = 1; - } + tx_info->map_linear_data = 0; - ena_buf->paddr = dma; - ena_buf->len = size; + ena_buf = tx_info->bufs; + ena_buf->paddr = dma; + ena_buf->len = size; + + ena_tx_ctx->ena_bufs = ena_buf; + ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; + } return 0; @@ -274,10 +286,6 @@ error_report_dma_error: &xdp_ring->syncp); netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); - xdp_return_frame_rx_napi(tx_info->xdpf); - tx_info->xdpf = NULL; - tx_info->num_of_bufs = 0; - return -EINVAL; } @@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, struct ena_com_tx_ctx ena_tx_ctx = {}; struct ena_tx_buffer *tx_info; u16 next_to_use, req_id; - void *push_hdr; - u32 push_len; int rc; next_to_use = xdp_ring->next_to_use; @@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, tx_info = &xdp_ring->tx_buffer_info[req_id]; tx_info->num_of_bufs = 0; - rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len); + rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); if (unlikely(rc)) return rc; - ena_tx_ctx.ena_bufs = tx_info->bufs; - ena_tx_ctx.push_header = push_hdr; - ena_tx_ctx.num_bufs = tx_info->num_of_bufs; ena_tx_ctx.req_id = req_id; - ena_tx_ctx.header_len = push_len; rc = ena_xmit_common(dev, xdp_ring, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index b3d74332ed33..7748b276e5fd 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1849,6 +1849,7 @@ out_free_netdev: free_netdev(netdev); out_pci_release: pci_release_mem_regions(pdev); + pci_disable_pcie_error_reporting(pdev); out_pci_disable: pci_disable_device(pdev); return err; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 314f8d806723..9058f09f921e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -2177,8 +2177,6 @@ int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid, bool persistent, u8 *smt_idx); int cxgb4_get_msix_idx_from_bmap(struct adapter *adap); void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx); -int cxgb_open(struct net_device *dev); -int cxgb_close(struct net_device *dev); void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q); void cxgb4_quiesce_rx(struct sge_rspq *q); int cxgb4_port_mirror_alloc(struct net_device *dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 61ea3ec5c3fc..83ed10ac8660 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -1337,13 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev, return ret; } - spin_lock_bh(&adap->win0_lock); + /* We have to RESET the chip/firmware because we need the + * chip in uninitialized state for loading new PHY image. + * Otherwise, the running firmware will only store the PHY + * image in local RAM which will be lost after next reset. + */ + ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F); + if (ret < 0) { + dev_err(adap->pdev_dev, + "Set FW to RESET for flashing PHY FW failed. ret: %d\n", + ret); + return ret; + } + ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size); - spin_unlock_bh(&adap->win0_lock); - if (ret) - dev_err(adap->pdev_dev, "Failed to load PHY FW\n"); + if (ret < 0) { + dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n", + ret); + return ret; + } - return ret; + return 0; } static int cxgb4_ethtool_flash_fw(struct net_device *netdev, @@ -1610,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap, u32 ftid) { struct tid_info *t = &adap->tids; - struct filter_entry *f; - if (ftid < t->nhpftids) - f = &adap->tids.hpftid_tab[ftid]; - else if (ftid < t->nftids) - f = &adap->tids.ftid_tab[ftid - t->nhpftids]; - else - f = lookup_tid(&adap->tids, ftid); + if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids) + return &t->hpftid_tab[ftid - t->hpftid_base]; - return f; + if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids) + return &t->ftid_tab[ftid - t->ftid_base]; + + return lookup_tid(t, ftid); } static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs, @@ -1826,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev, filter_id = filter_info->loc_array[cmd->fs.location]; f = cxgb4_get_filter_entry(adapter, filter_id); + if (f->fs.prio) + filter_id -= adapter->tids.hpftid_base; + else if (!f->fs.hash) + filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids); + ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id); if (ret) goto err; @@ -1885,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev, filter_info = &adapter->ethtool_filters->port[pi->port_id]; + if (fs.prio) + tid += adapter->tids.hpftid_base; + else if (!fs.hash) + tid += (adapter->tids.ftid_base - adapter->tids.nhpftids); + filter_info->loc_array[cmd->fs.location] = tid; set_bit(cmd->fs.location, filter_info->bmap); filter_info->in_use++; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 421bd9b88028..762113a04dde 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2834,7 +2834,7 @@ static void cxgb_down(struct adapter *adapter) /* * net_device operations */ -int cxgb_open(struct net_device *dev) +static int cxgb_open(struct net_device *dev) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; @@ -2882,7 +2882,7 @@ out_unlock: return err; } -int cxgb_close(struct net_device *dev) +static int cxgb_close(struct net_device *dev) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; @@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap) /* Load PHY Firmware onto adapter. */ - spin_lock_bh(&adap->win0_lock); ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version, (u8 *)phyf->data, phyf->size); - spin_unlock_bh(&adap->win0_lock); if (ret < 0) dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n", -ret); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c index 6c259de96f96..338b04f339b3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c @@ -589,7 +589,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev, * down before configuring tc params. */ if (netif_running(dev)) { - cxgb_close(dev); + netif_tx_stop_all_queues(dev); + netif_carrier_off(dev); needs_bring_up = true; } @@ -615,8 +616,10 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev, } out: - if (needs_bring_up) - cxgb_open(dev); + if (needs_bring_up) { + netif_tx_start_all_queues(dev); + netif_carrier_on(dev); + } mutex_unlock(&adap->tc_mqprio->mqprio_mutex); return ret; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 1e5f2edb70cf..6a099cb34b12 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2556,6 +2556,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc) if (!eosw_txq) return -ENOMEM; + if (!(adap->flags & CXGB4_FW_OK)) { + /* Don't stall caller when access to FW is lost */ + complete(&eosw_txq->completion); + return -EIO; + } + skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 9428ef1f04a8..a0555f4d76fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr, * @addr: the start address to write * @n: length of data to write in bytes * @data: the data to write + * @byte_oriented: whether to store data as bytes or as words * * Writes up to a page of data (256 bytes) to the serial flash starting * at the given address. All the data must be written to the same page. + * If @byte_oriented is set the write data is stored as byte stream + * (i.e. matches what on disk), otherwise in big-endian. */ static int t4_write_flash(struct adapter *adapter, unsigned int addr, - unsigned int n, const u8 *data) + unsigned int n, const u8 *data, bool byte_oriented) { - int ret; - u32 buf[64]; unsigned int i, c, left, val, offset = addr & 0xff; + u32 buf[64]; + int ret; if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE) return -EINVAL; @@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr, (ret = sf1_write(adapter, 4, 1, 1, val)) != 0) goto unlock; - for (left = n; left; left -= c) { + for (left = n; left; left -= c, data += c) { c = min(left, 4U); - for (val = 0, i = 0; i < c; ++i) - val = (val << 8) + *data++; + for (val = 0, i = 0; i < c; ++i) { + if (byte_oriented) + val = (val << 8) + data[i]; + else + val = (val << 8) + data[c - i - 1]; + } ret = sf1_write(adapter, c, c != left, 1, val); if (ret) @@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr, t4_write_reg(adapter, SF_OP_A, 0); /* unlock SF */ /* Read the page to verify the write succeeded */ - ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1); + ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, + byte_oriented); if (ret) return ret; @@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true); if (ret) goto out; @@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data); + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true); if (ret) goto out; } - ret = t4_write_flash(adap, - fw_start + offsetof(struct fw_hdr, fw_ver), - sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); + ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver), + sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver, + true); out: if (ret) dev_err(adap->pdev_dev, "firmware download failed, error %d\n", @@ -3812,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win, /* Copy the supplied PHY Firmware image to the adapter memory location * allocated by the adapter firmware. */ + spin_lock_bh(&adap->win0_lock); ret = t4_memory_rw(adap, win, mtype, maddr, phy_fw_size, (__be32 *)phy_fw_data, T4_MEMORY_WRITE); + spin_unlock_bh(&adap->win0_lock); if (ret) return ret; @@ -10208,7 +10218,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) n = size - i; else n = SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, n, cfg_data); + ret = t4_write_flash(adap, addr, n, cfg_data, true); if (ret) goto out; @@ -10677,13 +10687,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data, for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; boot_data += SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data); + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data, + false); if (ret) goto out; } ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE, - (const u8 *)header); + (const u8 *)header, false); out: if (ret) @@ -10758,7 +10769,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) for (i = 0; i < size; i += SF_PAGE_SIZE) { n = min_t(u32, size - i, SF_PAGE_SIZE); - ret = t4_write_flash(adap, addr, n, cfg_data); + ret = t4_write_flash(adap, addr, n, cfg_data, false); if (ret) goto out; @@ -10770,7 +10781,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) for (i = 0; i < npad; i++) { u8 data = 0; - ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data); + ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data, + false); if (ret) goto out; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index d9ddd0bcf65f..99301ad95290 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1773,49 +1773,6 @@ ice_phy_type_to_ethtool(struct net_device *netdev, ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, 100000baseKR4_Full); } - - /* Autoneg PHY types */ - if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX || - phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T || - phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX || - phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T || - phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX || - phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T || - phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR || - phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T || - phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S || - phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1 || - phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 || - phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - Autoneg); - } - if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 || - phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 || - phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP || - phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - Autoneg); - } - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 || - phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 || - phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 || - phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) { - ethtool_link_ksettings_add_link_mode(ks, supported, - Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - Autoneg); - } } #define TEST_SET_BITS_TIMEOUT 50 @@ -1972,9 +1929,7 @@ ice_get_link_ksettings(struct net_device *netdev, ks->base.port = PORT_TP; break; case ICE_MEDIA_BACKPLANE: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); ethtool_link_ksettings_add_link_mode(ks, advertising, Backplane); ks->base.port = PORT_NONE; @@ -2049,6 +2004,12 @@ ice_get_link_ksettings(struct net_device *netdev, if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN) ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + /* Set supported and advertised autoneg */ + if (ice_is_phy_caps_an_enabled(caps)) { + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + } + done: kfree(caps); return err; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index de38a0fc9665..9b8300d4a267 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -31,6 +31,7 @@ #define PF_FW_ATQLEN_ATQOVFL_M BIT(29) #define PF_FW_ATQLEN_ATQCRIT_M BIT(30) #define VF_MBX_ARQLEN(_VF) (0x0022BC00 + ((_VF) * 4)) +#define VF_MBX_ATQLEN(_VF) (0x0022A800 + ((_VF) * 4)) #define PF_FW_ATQLEN_ATQENABLE_M BIT(31) #define PF_FW_ATQT 0x00080400 #define PF_MBX_ARQBAH 0x0022E400 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7f7653906fce..27f9dac8719c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -200,6 +200,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; + if (vf->num_req_qs) + vf->num_vf_qs = vf->num_req_qs; vsi->alloc_txq = vf->num_vf_qs; vsi->alloc_rxq = vf->num_vf_qs; /* pf->num_msix_per_vf includes (VF miscellaneous vector + @@ -1715,12 +1717,13 @@ setup_rings: * ice_vsi_cfg_txqs - Configure the VSI for Tx * @vsi: the VSI being configured * @rings: Tx ring array to be configured + * @count: number of Tx ring array elements * * Return 0 on success and a negative value on error * Configure the Tx VSI for operation. */ static int -ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count) { struct ice_aqc_add_tx_qgrp *qg_buf; u16 q_idx = 0; @@ -1732,7 +1735,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) qg_buf->num_txqs = 1; - for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + for (q_idx = 0; q_idx < count; q_idx++) { err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf); if (err) goto err_cfg_txqs; @@ -1752,7 +1755,7 @@ err_cfg_txqs: */ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) { - return ice_vsi_cfg_txqs(vsi, vsi->tx_rings); + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq); } /** @@ -1767,7 +1770,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) int ret; int i; - ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings); + ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq); if (ret) return ret; @@ -2007,17 +2010,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi) * @rst_src: reset source * @rel_vmvf_num: Relative ID of VF/VM * @rings: Tx ring array to be stopped + * @count: number of Tx ring array elements */ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring **rings) + u16 rel_vmvf_num, struct ice_ring **rings, u16 count) { u16 q_idx; if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) return -EINVAL; - for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + for (q_idx = 0; q_idx < count; q_idx++) { struct ice_txq_meta txq_meta = { }; int status; @@ -2045,7 +2049,7 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num) { - return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings); + return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq); } /** @@ -2054,7 +2058,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, */ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) { - return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings); + return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4ee85a217c6f..0eb2307325d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2556,6 +2556,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /** + * ice_xdp_safe_mode - XDP handler for safe mode + * @dev: netdevice + * @xdp: XDP command + */ +static int ice_xdp_safe_mode(struct net_device __always_unused *dev, + struct netdev_bpf *xdp) +{ + NL_SET_ERR_MSG_MOD(xdp->extack, + "Please provide working DDP firmware package in order to use XDP\n" + "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst"); + return -EOPNOTSUPP; +} + +/** * ice_xdp - implements XDP handler * @dev: netdevice * @xdp: XDP command @@ -6937,6 +6951,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_tx_timeout = ice_tx_timeout, + .ndo_bpf = ice_xdp_safe_mode, }; static const struct net_device_ops ice_netdev_ops = { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 93e5d9ebfd74..04748aa4c7c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2149,6 +2149,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) struct ice_tx_offload_params offload = { 0 }; struct ice_vsi *vsi = tx_ring->vsi; struct ice_tx_buf *first; + struct ethhdr *eth; unsigned int count; int tso, csum; @@ -2195,7 +2196,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) goto out_drop; /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ - if (unlikely(skb->priority == TC_PRIO_CONTROL && + eth = (struct ethhdr *)skb_mac_header(skb); + if (unlikely((skb->priority == TC_PRIO_CONTROL || + eth->h_proto == htons(ETH_P_LLDP)) && vsi->type == ICE_VSI_PF && vsi->port_info->qos_cfg.is_sw_lldp)) offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index a1d22d2aa0bd..97a46c616aca 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -713,13 +713,15 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) */ clear_bit(ICE_VF_STATE_INIT, vf->vf_states); - /* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it - * in the case of VFR. If this is done for PFR, it can mess up VF - * resets because the VF driver may already have started cleanup - * by the time we get here. + /* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver + * needs to clear them in the case of VFR/VFLR. If this is done for + * PFR, it can mess up VF resets because the VF driver may already + * have started cleanup by the time we get here. */ - if (!is_pfr) + if (!is_pfr) { wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0); + wr32(hw, VF_MBX_ATQLEN(vf->vf_id), 0); + } /* In the case of a VFLR, the HW has already reset the VF and we * just need to clean up, so don't hit the VFRTRIG register. @@ -1698,7 +1700,12 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) ice_vf_ctrl_vsi_release(vf); ice_vf_pre_vsi_rebuild(vf); - ice_vf_rebuild_vsi_with_release(vf); + + if (ice_vf_rebuild_vsi_with_release(vf)) { + dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id); + return false; + } + ice_vf_post_vsi_rebuild(vf); /* if the VF has been reset allow it to come up again */ diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 36dc3e5f6218..21ef2f128070 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev) static int xrx200_alloc_skb(struct xrx200_chan *ch) { + struct sk_buff *skb = ch->skb[ch->dma.desc]; dma_addr_t mapping; int ret = 0; @@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch) XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { dev_kfree_skb_any(ch->skb[ch->dma.desc]); + ch->skb[ch->dma.desc] = skb; ret = -ENOMEM; goto skip; } @@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) ch->dma.desc %= LTQ_DESC_NUM; if (ret) { - ch->skb[ch->dma.desc] = skb; net_dev->stats.rx_dropped++; netdev_err(net_dev, "failed to allocate new rx buffer\n"); return ret; @@ -352,8 +353,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr) struct xrx200_chan *ch = ptr; if (napi_schedule_prep(&ch->napi)) { - __napi_schedule(&ch->napi); ltq_dma_disable_irq(&ch->dma); + __napi_schedule(&ch->napi); } ltq_dma_ack_irq(&ch->dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 0dd7615e5931..bc33eaada3b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -64,6 +64,8 @@ struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) struct mlx5e_priv *priv = netdev_priv(dev); struct devlink_port *port; + if (!netif_device_present(dev)) + return NULL; port = mlx5e_devlink_get_dl_port(priv); if (port->registered) return port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index d907c1acd4d5..778e229310a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2020 Mellanox Technologies -#include <linux/ptp_classify.h> #include "en/ptp.h" #include "en/txrx.h" #include "en/params.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index ab935cce952b..c96668bd701c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -6,6 +6,7 @@ #include "en.h" #include "en_stats.h" +#include <linux/ptp_classify.h> struct mlx5e_ptpsq { struct mlx5e_txqsq txqsq; @@ -43,6 +44,27 @@ struct mlx5e_ptp { DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); }; +static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) +{ + struct flow_keys fk; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return false; + + if (fk.basic.n_proto == htons(ETH_P_1588)) + return true; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return false; + + return (fk.basic.ip_proto == IPPROTO_UDP && + fk.ports.dst == htons(PTP_EV_PORT)); +} + int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, u8 lag_port, struct mlx5e_ptp **cp); void mlx5e_ptp_close(struct mlx5e_ptp *c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index be0ee03de721..2e9bee4e5209 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) work); struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct neighbour *n = update_work->n; + struct mlx5e_encap_entry *e = NULL; bool neigh_connected, same_dev; - struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; - struct mlx5e_priv *priv; u8 nud_state, dead; rtnl_lock(); @@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) if (!same_dev) goto out; - list_for_each_entry(e, &nhe->encap_list, encap_list) { - if (!mlx5e_encap_take(e)) - continue; + /* mlx5e_get_next_init_encap() releases previous encap before returning + * the next one. + */ + while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) + mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); - priv = netdev_priv(e->out_dev); - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); - mlx5e_encap_put(priv, e); - } out: rtnl_unlock(); mlx5e_release_neigh_update_work(update_work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 311382261840..85eaadc989df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -94,13 +94,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, ASSERT_RTNL(); - /* wait for encap to be fully initialized */ - wait_for_completion(&e->res_ready); - mutex_lock(&esw->offloads.encap_tbl_lock); encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - if (e->compl_result < 0 || (encap_connected == neigh_connected && - ether_addr_equal(e->h_dest, ha))) + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) goto unlock; mlx5e_take_all_encap_flows(e, &flow_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f1fb11680d20..490131e06efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -251,9 +251,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, mlx5e_take_tmp_flow(flow, flow_list, 0); } +typedef bool (match_cb)(struct mlx5e_encap_entry *); + static struct mlx5e_encap_entry * -mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, - struct mlx5e_encap_entry *e) +mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e, + match_cb match) { struct mlx5e_encap_entry *next = NULL; @@ -288,7 +291,7 @@ retry: /* wait for encap to be fully initialized */ wait_for_completion(&next->res_ready); /* continue searching if encap entry is not in valid state after completion */ - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + if (!match(next)) { e = next; goto retry; } @@ -296,6 +299,30 @@ retry: return next; } +static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) +{ + return e->flags & MLX5_ENCAP_ENTRY_VALID; +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); +} + +static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) +{ + return e->compl_result >= 0; +} + +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 3d45341e2216..26f7fab109d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct net_device *netdev = priv->netdev; - if (!priv->ipsec) - return; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || !MLX5_CAP_ETH(mdev, swp)) { mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 5cd466ec6492..25403af32859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -356,7 +356,7 @@ err: int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { - int err = 0; + int err = -ENOMEM; int i; if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec6bafe7a2e5..d26b8ed51195 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2705,8 +2705,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; - if (priv->channels.params.ptp_rx) - num_rxqs++; mlx5e_netdev_set_tcs(netdev, nch, ntc); @@ -4824,22 +4822,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { - netdev->hw_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; - netdev->hw_enc_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; - netdev->gso_partial_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; + netdev->hw_features |= NETIF_F_GSO_GRE; + netdev->hw_enc_features |= NETIF_F_GSO_GRE; + netdev->gso_partial_features |= NETIF_F_GSO_GRE; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dd64878e5b38..d4b0f270b6bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4765,7 +4765,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { wait_for_completion(&hpe->res_ready); if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) - hpe->hp->pair->peer_gone = true; + mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); mlx5e_hairpin_put(priv, hpe); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 25c091795bcd..17027536efba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e); void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 8ba62671f5f1..320fe0cda917 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -32,7 +32,6 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> -#include <linux/ptp_classify.h> #include <net/geneve.h> #include <net/dsfield.h> #include "en.h" @@ -67,24 +66,6 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb } #endif -static bool mlx5e_use_ptpsq(struct sk_buff *skb) -{ - struct flow_keys fk; - - if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) - return false; - - if (fk.basic.n_proto == htons(ETH_P_1588)) - return true; - - if (fk.basic.n_proto != htons(ETH_P_IP) && - fk.basic.n_proto != htons(ETH_P_IPV6)) - return false; - - return (fk.basic.ip_proto == IPPROTO_UDP && - fk.ports.dst == htons(PTP_EV_PORT)); -} - static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -145,9 +126,9 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, } ptp_channel = READ_ONCE(priv->channels.ptp); - if (unlikely(ptp_channel) && - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && - mlx5e_use_ptpsq(skb)) + if (unlikely(ptp_channel && + test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && + mlx5e_use_ptpsq(skb))) return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 77c0ca655975..940333410267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb, eqe = next_eqe_sw(eq); if (!eqe) - return 0; + goto out; do { struct mlx5_core_cq *cq; @@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb, ++eq->cons_index; } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + +out: eq_update_ci(eq, 1); if (cqn != -1) @@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb, ++eq->cons_index; } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); - eq_update_ci(eq, 1); out: + eq_update_ci(eq, 1); mlx5_eq_async_int_unlock(eq_async, recovery, &flags); return unlikely(recovery) ? num_eqes : 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 441b5453acae..540cf05f6373 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; + if (!MLX5_CAP_GEN(dev, roce)) + return; + err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 612b0ac31db2..9737565cd8d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action); static inline bool mlx5dr_is_supported(struct mlx5_core_dev *dev) { - return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || - (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && - (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_6DX)); + return MLX5_CAP_GEN(dev, roce) && + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && + (MLX5_CAP_GEN(dev, steering_format_version) <= + MLX5_STEERING_FORMAT_CONNECTX_6DX))); } /* buddy functions & structure */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 01cc00ad8acf..b6931bbe52d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -424,6 +424,15 @@ err_modify_sq: return err; } +static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); +} + static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) { int i; @@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) for (i = 0; i < hp->num_channels; i++) mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_RST, 0, 0); - /* unset peer SQs */ - if (hp->peer_gone) - return; - for (i = 0; i < hp->num_channels; i++) - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RST, 0, 0); + if (!hp->peer_gone) + mlx5_hairpin_unpair_peer_sq(hp); } struct mlx5_hairpin * @@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp) mlx5_hairpin_destroy_queues(hp); kfree(hp); } + +void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp) +{ + int i; + + mlx5_hairpin_unpair_peer_sq(hp); + + /* destroy peer SQ */ + for (i = 0; i < hp->num_channels; i++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + + hp->peer_gone = true; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index dfea14399607..85f0ce285146 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -693,7 +693,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - NULL, 0, 0); + NULL, 0, + module_tz->parent->polling_delay); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); return err; @@ -815,7 +816,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) MLXSW_THERMAL_TRIP_MASK, gearbox_tz, &mlxsw_thermal_gearbox_ops, - NULL, 0, 0); + NULL, 0, + gearbox_tz->parent->polling_delay); if (IS_ERR(gearbox_tz->tzdev)) return PTR_ERR(gearbox_tz->tzdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 900b4bf5bb5b..2bc5a9003c6d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3907,7 +3907,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6); #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 -#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, enum mlxsw_reg_qeec_hr hr, u8 index, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 04672eb5c7f3..9958d503bf0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -1332,6 +1332,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, u8 band, u32 child_handle) { struct mlxsw_sp_qdisc *old_qdisc; + u32 parent; if (band < mlxsw_sp_qdisc->num_classes && mlxsw_sp_qdisc->qdiscs[band].handle == child_handle) @@ -1352,7 +1353,9 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); - mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band); + parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1); + mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, + parent); if (!WARN_ON(!mlxsw_sp_qdisc)) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 0c4283319d7f..adfb9781799e 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -379,6 +379,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port) int ocelot_port_flush(struct ocelot *ocelot, int port) { + unsigned int pause_ena; int err, val; /* Disable dequeuing from the egress queues */ @@ -387,6 +388,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port) QSYS_PORT_MODE, port); /* Disable flow control */ + ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena); ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); /* Disable priority flow control */ @@ -422,6 +424,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port) /* Clear flushing again. */ ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port); + /* Re-enable flow control */ + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena); + return err; } EXPORT_SYMBOL(ocelot_port_flush); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 7e6bac85495d..344ea1143454 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1602,6 +1602,8 @@ err_out_free_netdev: free_netdev(netdev); err_out_free_res: + if (NX_IS_REVISION_P3(pdev->revision)) + pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); err_out_disable_pdev: diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 96b947fde646..3beafc60747e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2690,6 +2690,7 @@ err_out_free_hw_res: kfree(ahw); err_out_free_res: + pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev); err_out_disable_pdev: diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 41fbd2ceeede..ab1e0fcccabb 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s) { struct rmnet_priv *priv = netdev_priv(dev); - struct rmnet_vnd_stats total_stats; + struct rmnet_vnd_stats total_stats = { }; struct rmnet_pcpu_stats *pcpu_ptr; + struct rmnet_vnd_stats snapshot; unsigned int cpu, start; - memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); - for_each_possible_cpu(cpu) { pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu); do { start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); - total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts; - total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes; - total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts; - total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes; + snapshot = pcpu_ptr->stats; /* struct assignment */ } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); - total_stats.tx_drops += pcpu_ptr->stats.tx_drops; + total_stats.rx_pkts += snapshot.rx_pkts; + total_stats.rx_bytes += snapshot.rx_bytes; + total_stats.tx_pkts += snapshot.tx_pkts; + total_stats.tx_bytes += snapshot.tx_bytes; + total_stats.tx_drops += snapshot.tx_drops; } s->rx_packets = total_stats.rx_pkts; @@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port, } return 0; -}
\ No newline at end of file +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index b70d44ac0990..3c73453725f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -76,10 +76,10 @@ enum power_event { #define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */ /* GMAC HW ADDR regs */ -#define GMAC_ADDR_HIGH(reg) (((reg > 15) ? 0x00000800 : 0x00000040) + \ - (reg * 8)) -#define GMAC_ADDR_LOW(reg) (((reg > 15) ? 0x00000804 : 0x00000044) + \ - (reg * 8)) +#define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \ + 0x00000040 + (reg * 8)) +#define GMAC_ADDR_LOW(reg) ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \ + 0x00000044 + (reg * 8)) #define GMAC_MAX_PERFECT_ADDRESSES 1 #define GMAC_PCS_BASE 0x000000c0 /* PCS register base */ diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index 0d8293a47a56..b806f2f8f859 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev) return 0; } -static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); const struct mhi_net_proto *proto = mhi_netdev->proto; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 9bd9a5c0b1db..6bbc81ad295f 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev) { int err; - err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); if (err < 0) return err; usleep_range(10, 20); - /* After reset FORCE_LINK_GOOD bit is set. Although the - * default value should be unset. Disable FORCE_LINK_GOOD - * for the phy to work properly. - */ return phy_modify(phydev, MII_DP83867_PHYCTRL, DP83867_PHYCR_FORCE_LINK_GOOD, 0); } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index b04055fd1b79..df0d1837e4ed 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1880,7 +1880,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET - | FLAG_LINK_INTR, + | FLAG_LINK_INTR | FLAG_ETHER, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6700f1970b24..bc55ec739af9 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { skb->protocol = htons(ETH_P_MAP); - return (netif_rx(skb) == NET_RX_SUCCESS); + return 1; } switch (skb->data[0] & 0xf0) { diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 503e2fd7ce51..28a6c4cfe9b8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev) dev->flags = IFF_MASTER | IFF_NOARP; - /* MTU is irrelevant for VRF device; set to 64k similar to lo */ - dev->mtu = 64 * 1024; - /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; netdev_lockdep_set_classes(dev); @@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev) * which breaks networking. */ dev->min_mtu = IPV6_MIN_MTU; - dev->max_mtu = ETH_MAX_MTU; + dev->max_mtu = IP6_MAX_MTU; + dev->mtu = dev->max_mtu; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile index fc52b2cb500b..dbe1f8514efc 100644 --- a/drivers/net/wireguard/Makefile +++ b/drivers/net/wireguard/Makefile @@ -1,5 +1,4 @@ -ccflags-y := -O3 -ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG wireguard-y := main.o wireguard-y += noise.o diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 3725e9cd85f4..b7197e80f226 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -6,6 +6,8 @@ #include "allowedips.h" #include "peer.h" +static struct kmem_cache *node_cache; + static void swap_endian(u8 *dst, const u8 *src, u8 bits) { if (bits == 32) { @@ -28,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, node->bitlen = bits; memcpy(node->bits, src, bits / 8U); } -#define CHOOSE_NODE(parent, key) \ - parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] + +static inline u8 choose(struct allowedips_node *node, const u8 *key) +{ + return (key[node->bit_at_a] >> node->bit_at_b) & 1; +} static void push_rcu(struct allowedips_node **stack, struct allowedips_node __rcu *p, unsigned int *len) @@ -40,6 +45,11 @@ static void push_rcu(struct allowedips_node **stack, } } +static void node_free_rcu(struct rcu_head *rcu) +{ + kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); +} + static void root_free_rcu(struct rcu_head *rcu) { struct allowedips_node *node, *stack[128] = { @@ -49,7 +59,7 @@ static void root_free_rcu(struct rcu_head *rcu) while (len > 0 && (node = stack[--len])) { push_rcu(stack, node->bit[0], &len); push_rcu(stack, node->bit[1], &len); - kfree(node); + kmem_cache_free(node_cache, node); } } @@ -66,60 +76,6 @@ static void root_remove_peer_lists(struct allowedips_node *root) } } -static void walk_remove_by_peer(struct allowedips_node __rcu **top, - struct wg_peer *peer, struct mutex *lock) -{ -#define REF(p) rcu_access_pointer(p) -#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) -#define PUSH(p) ({ \ - WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ - stack[len++] = p; \ - }) - - struct allowedips_node __rcu **stack[128], **nptr; - struct allowedips_node *node, *prev; - unsigned int len; - - if (unlikely(!peer || !REF(*top))) - return; - - for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { - nptr = stack[len - 1]; - node = DEREF(nptr); - if (!node) { - --len; - continue; - } - if (!prev || REF(prev->bit[0]) == node || - REF(prev->bit[1]) == node) { - if (REF(node->bit[0])) - PUSH(&node->bit[0]); - else if (REF(node->bit[1])) - PUSH(&node->bit[1]); - } else if (REF(node->bit[0]) == prev) { - if (REF(node->bit[1])) - PUSH(&node->bit[1]); - } else { - if (rcu_dereference_protected(node->peer, - lockdep_is_held(lock)) == peer) { - RCU_INIT_POINTER(node->peer, NULL); - list_del_init(&node->peer_list); - if (!node->bit[0] || !node->bit[1]) { - rcu_assign_pointer(*nptr, DEREF( - &node->bit[!REF(node->bit[0])])); - kfree_rcu(node, rcu); - node = DEREF(nptr); - } - } - --len; - } - } - -#undef REF -#undef DEREF -#undef PUSH -} - static unsigned int fls128(u64 a, u64 b) { return a ? fls64(a) + 64U : fls64(b); @@ -159,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, found = node; if (node->cidr == bits) break; - node = rcu_dereference_bh(CHOOSE_NODE(node, key)); + node = rcu_dereference_bh(node->bit[choose(node, key)]); } return found; } @@ -191,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, u8 cidr, u8 bits, struct allowedips_node **rnode, struct mutex *lock) { - struct allowedips_node *node = rcu_dereference_protected(trie, - lockdep_is_held(lock)); + struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock)); struct allowedips_node *parent = NULL; bool exact = false; @@ -202,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, exact = true; break; } - node = rcu_dereference_protected(CHOOSE_NODE(parent, key), - lockdep_is_held(lock)); + node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock)); } *rnode = parent; return exact; } +static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) +{ + node->parent_bit_packed = (unsigned long)parent | bit; + rcu_assign_pointer(*parent, node); +} + +static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node) +{ + u8 bit = choose(parent, node->bits); + connect_node(&parent->bit[bit], bit, node); +} + static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, u8 cidr, struct wg_peer *peer, struct mutex *lock) { @@ -218,13 +184,13 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, return -EINVAL; if (!rcu_access_pointer(*trie)) { - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); if (unlikely(!node)) return -ENOMEM; RCU_INIT_POINTER(node->peer, peer); list_add_tail(&node->peer_list, &peer->allowedips_list); copy_and_assign_cidr(node, key, cidr, bits); - rcu_assign_pointer(*trie, node); + connect_node(trie, 2, node); return 0; } if (node_placement(*trie, key, cidr, bits, &node, lock)) { @@ -233,7 +199,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, return 0; } - newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL); if (unlikely(!newnode)) return -ENOMEM; RCU_INIT_POINTER(newnode->peer, peer); @@ -243,10 +209,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, if (!node) { down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); } else { - down = rcu_dereference_protected(CHOOSE_NODE(node, key), - lockdep_is_held(lock)); + const u8 bit = choose(node, key); + down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock)); if (!down) { - rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); + connect_node(&node->bit[bit], bit, newnode); return 0; } } @@ -254,30 +220,29 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, parent = node; if (newnode->cidr == cidr) { - rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); + choose_and_connect_node(newnode, down); if (!parent) - rcu_assign_pointer(*trie, newnode); + connect_node(trie, 2, newnode); else - rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), - newnode); - } else { - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (unlikely(!node)) { - list_del(&newnode->peer_list); - kfree(newnode); - return -ENOMEM; - } - INIT_LIST_HEAD(&node->peer_list); - copy_and_assign_cidr(node, newnode->bits, cidr, bits); + choose_and_connect_node(parent, newnode); + return 0; + } - rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); - rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); - if (!parent) - rcu_assign_pointer(*trie, node); - else - rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), - node); + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); + if (unlikely(!node)) { + list_del(&newnode->peer_list); + kmem_cache_free(node_cache, newnode); + return -ENOMEM; } + INIT_LIST_HEAD(&node->peer_list); + copy_and_assign_cidr(node, newnode->bits, cidr, bits); + + choose_and_connect_node(node, down); + choose_and_connect_node(node, newnode); + if (!parent) + connect_node(trie, 2, node); + else + choose_and_connect_node(parent, node); return 0; } @@ -335,9 +300,41 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, void wg_allowedips_remove_by_peer(struct allowedips *table, struct wg_peer *peer, struct mutex *lock) { + struct allowedips_node *node, *child, **parent_bit, *parent, *tmp; + bool free_parent; + + if (list_empty(&peer->allowedips_list)) + return; ++table->seq; - walk_remove_by_peer(&table->root4, peer, lock); - walk_remove_by_peer(&table->root6, peer, lock); + list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) { + list_del_init(&node->peer_list); + RCU_INIT_POINTER(node->peer, NULL); + if (node->bit[0] && node->bit[1]) + continue; + child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], + lockdep_is_held(lock)); + if (child) + child->parent_bit_packed = node->parent_bit_packed; + parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); + *parent_bit = child; + parent = (void *)parent_bit - + offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); + free_parent = !rcu_access_pointer(node->bit[0]) && + !rcu_access_pointer(node->bit[1]) && + (node->parent_bit_packed & 3) <= 1 && + !rcu_access_pointer(parent->peer); + if (free_parent) + child = rcu_dereference_protected( + parent->bit[!(node->parent_bit_packed & 1)], + lockdep_is_held(lock)); + call_rcu(&node->rcu, node_free_rcu); + if (!free_parent) + continue; + if (child) + child->parent_bit_packed = parent->parent_bit_packed; + *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; + call_rcu(&parent->rcu, node_free_rcu); + } } int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) @@ -374,4 +371,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, return NULL; } +int __init wg_allowedips_slab_init(void) +{ + node_cache = KMEM_CACHE(allowedips_node, 0); + return node_cache ? 0 : -ENOMEM; +} + +void wg_allowedips_slab_uninit(void) +{ + rcu_barrier(); + kmem_cache_destroy(node_cache); +} + #include "selftest/allowedips.c" diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h index e5c83cafcef4..2346c797eb4d 100644 --- a/drivers/net/wireguard/allowedips.h +++ b/drivers/net/wireguard/allowedips.h @@ -15,14 +15,11 @@ struct wg_peer; struct allowedips_node { struct wg_peer __rcu *peer; struct allowedips_node __rcu *bit[2]; - /* While it may seem scandalous that we waste space for v4, - * we're alloc'ing to the nearest power of 2 anyway, so this - * doesn't actually make a difference. - */ - u8 bits[16] __aligned(__alignof(u64)); u8 cidr, bit_at_a, bit_at_b, bitlen; + u8 bits[16] __aligned(__alignof(u64)); - /* Keep rarely used list at bottom to be beyond cache line. */ + /* Keep rarely used members at bottom to be beyond cache line. */ + unsigned long parent_bit_packed; union { struct list_head peer_list; struct rcu_head rcu; @@ -33,7 +30,7 @@ struct allowedips { struct allowedips_node __rcu *root4; struct allowedips_node __rcu *root6; u64 seq; -}; +} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */ void wg_allowedips_init(struct allowedips *table); void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); @@ -56,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, bool wg_allowedips_selftest(void); #endif +int wg_allowedips_slab_init(void); +void wg_allowedips_slab_uninit(void); + #endif /* _WG_ALLOWEDIPS_H */ diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 7a7d5f1a80fc..75dbe77b0b4b 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -21,13 +21,22 @@ static int __init mod_init(void) { int ret; + ret = wg_allowedips_slab_init(); + if (ret < 0) + goto err_allowedips; + #ifdef DEBUG + ret = -ENOTRECOVERABLE; if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || !wg_ratelimiter_selftest()) - return -ENOTRECOVERABLE; + goto err_peer; #endif wg_noise_init(); + ret = wg_peer_init(); + if (ret < 0) + goto err_peer; + ret = wg_device_init(); if (ret < 0) goto err_device; @@ -44,6 +53,10 @@ static int __init mod_init(void) err_netlink: wg_device_uninit(); err_device: + wg_peer_uninit(); +err_peer: + wg_allowedips_slab_uninit(); +err_allowedips: return ret; } @@ -51,6 +64,8 @@ static void __exit mod_exit(void) { wg_genetlink_uninit(); wg_device_uninit(); + wg_peer_uninit(); + wg_allowedips_slab_uninit(); } module_init(mod_init); diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c index cd5cb0292cb6..1acd00ab2fbc 100644 --- a/drivers/net/wireguard/peer.c +++ b/drivers/net/wireguard/peer.c @@ -15,6 +15,7 @@ #include <linux/rcupdate.h> #include <linux/list.h> +static struct kmem_cache *peer_cache; static atomic64_t peer_counter = ATOMIC64_INIT(0); struct wg_peer *wg_peer_create(struct wg_device *wg, @@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, if (wg->num_peers >= MAX_PEERS_PER_DEVICE) return ERR_PTR(ret); - peer = kzalloc(sizeof(*peer), GFP_KERNEL); + peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL); if (unlikely(!peer)) return ERR_PTR(ret); - if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))) goto err; peer->device = wg; @@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, return peer; err: - kfree(peer); + kmem_cache_free(peer_cache, peer); return ERR_PTR(ret); } @@ -88,7 +89,7 @@ static void peer_make_dead(struct wg_peer *peer) /* Mark as dead, so that we don't allow jumping contexts after. */ WRITE_ONCE(peer->is_dead, true); - /* The caller must now synchronize_rcu() for this to take effect. */ + /* The caller must now synchronize_net() for this to take effect. */ } static void peer_remove_after_dead(struct wg_peer *peer) @@ -160,7 +161,7 @@ void wg_peer_remove(struct wg_peer *peer) lockdep_assert_held(&peer->device->device_update_lock); peer_make_dead(peer); - synchronize_rcu(); + synchronize_net(); peer_remove_after_dead(peer); } @@ -178,7 +179,7 @@ void wg_peer_remove_all(struct wg_device *wg) peer_make_dead(peer); list_add_tail(&peer->peer_list, &dead_peers); } - synchronize_rcu(); + synchronize_net(); list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) peer_remove_after_dead(peer); } @@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head *rcu) /* The final zeroing takes care of clearing any remaining handshake key * material and other potentially sensitive information. */ - kfree_sensitive(peer); + memzero_explicit(peer, sizeof(*peer)); + kmem_cache_free(peer_cache, peer); } static void kref_release(struct kref *refcount) @@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer) return; kref_put(&peer->refcount, kref_release); } + +int __init wg_peer_init(void) +{ + peer_cache = KMEM_CACHE(wg_peer, 0); + return peer_cache ? 0 : -ENOMEM; +} + +void wg_peer_uninit(void) +{ + kmem_cache_destroy(peer_cache); +} diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h index 8d53b687a1d1..76e4d3128ad4 100644 --- a/drivers/net/wireguard/peer.h +++ b/drivers/net/wireguard/peer.h @@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer); void wg_peer_remove(struct wg_peer *peer); void wg_peer_remove_all(struct wg_device *wg); +int wg_peer_init(void); +void wg_peer_uninit(void); + #endif /* _WG_PEER_H */ diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c index 846db14cb046..e173204ae7d7 100644 --- a/drivers/net/wireguard/selftest/allowedips.c +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -19,32 +19,22 @@ #include <linux/siphash.h> -static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, - u8 cidr) -{ - swap_endian(dst, src, bits); - memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); - if (cidr) - dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); -} - static __init void print_node(struct allowedips_node *node, u8 bits) { char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; - char *fmt_declaration = KERN_DEBUG - "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + u8 ip1[16], ip2[16], cidr1, cidr2; char *style = "dotted"; - u8 ip1[16], ip2[16]; u32 color = 0; + if (node == NULL) + return; if (bits == 32) { fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; - fmt_declaration = KERN_DEBUG - "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; + fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; } else if (bits == 128) { fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; - fmt_declaration = KERN_DEBUG - "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; + fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; } if (node->peer) { hsiphash_key_t key = { { 0 } }; @@ -55,24 +45,20 @@ static __init void print_node(struct allowedips_node *node, u8 bits) hsiphash_1u32(0xabad1dea, &key) % 200; style = "bold"; } - swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); - printk(fmt_declaration, ip1, node->cidr, style, color); + wg_allowedips_read_node(node, ip1, &cidr1); + printk(fmt_declaration, ip1, cidr1, style, color); if (node->bit[0]) { - swap_endian_and_apply_cidr(ip2, - rcu_dereference_raw(node->bit[0])->bits, bits, - node->cidr); - printk(fmt_connection, ip1, node->cidr, ip2, - rcu_dereference_raw(node->bit[0])->cidr); - print_node(rcu_dereference_raw(node->bit[0]), bits); + wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2); + printk(fmt_connection, ip1, cidr1, ip2, cidr2); } if (node->bit[1]) { - swap_endian_and_apply_cidr(ip2, - rcu_dereference_raw(node->bit[1])->bits, - bits, node->cidr); - printk(fmt_connection, ip1, node->cidr, ip2, - rcu_dereference_raw(node->bit[1])->cidr); - print_node(rcu_dereference_raw(node->bit[1]), bits); + wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2); + printk(fmt_connection, ip1, cidr1, ip2, cidr2); } + if (node->bit[0]) + print_node(rcu_dereference_raw(node->bit[0]), bits); + if (node->bit[1]) + print_node(rcu_dereference_raw(node->bit[1]), bits); } static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) @@ -121,8 +107,8 @@ static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) { union nf_inet_addr mask; - memset(&mask, 0x00, 128 / 8); - memset(&mask, 0xff, cidr / 8); + memset(&mask, 0, sizeof(mask)); + memset(&mask.all, 0xff, cidr / 8); if (cidr % 32) mask.all[cidr / 32] = (__force u32)htonl( (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); @@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allowedips_node *node) } static __init inline bool -horrible_match_v4(const struct horrible_allowedips_node *node, - struct in_addr *ip) +horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip) { return (ip->s_addr & node->mask.ip) == node->ip.ip; } static __init inline bool -horrible_match_v6(const struct horrible_allowedips_node *node, - struct in6_addr *ip) +horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip) { - return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == - node->ip.ip6[0] && - (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == - node->ip.ip6[1] && - (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == - node->ip.ip6[2] && + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] && + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] && + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] && (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; } static __init void -horrible_insert_ordered(struct horrible_allowedips *table, - struct horrible_allowedips_node *node) +horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node) { struct horrible_allowedips_node *other = NULL, *where = NULL; u8 my_cidr = horrible_mask_to_cidr(node->mask); hlist_for_each_entry(other, &table->head, table) { - if (!memcmp(&other->mask, &node->mask, - sizeof(union nf_inet_addr)) && - !memcmp(&other->ip, &node->ip, - sizeof(union nf_inet_addr)) && - other->ip_version == node->ip_version) { + if (other->ip_version == node->ip_version && + !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) && + !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) { other->value = node->value; kfree(node); return; } + } + hlist_for_each_entry(other, &table->head, table) { where = other; if (horrible_mask_to_cidr(other->mask) <= my_cidr) break; @@ -201,8 +181,7 @@ static __init int horrible_allowedips_insert_v4(struct horrible_allowedips *table, struct in_addr *ip, u8 cidr, void *value) { - struct horrible_allowedips_node *node = kzalloc(sizeof(*node), - GFP_KERNEL); + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (unlikely(!node)) return -ENOMEM; @@ -219,8 +198,7 @@ static __init int horrible_allowedips_insert_v6(struct horrible_allowedips *table, struct in6_addr *ip, u8 cidr, void *value) { - struct horrible_allowedips_node *node = kzalloc(sizeof(*node), - GFP_KERNEL); + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (unlikely(!node)) return -ENOMEM; @@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct horrible_allowedips *table, } static __init void * -horrible_allowedips_lookup_v4(struct horrible_allowedips *table, - struct in_addr *ip) +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip) { struct horrible_allowedips_node *node; - void *ret = NULL; hlist_for_each_entry(node, &table->head, table) { - if (node->ip_version != 4) - continue; - if (horrible_match_v4(node, ip)) { - ret = node->value; - break; - } + if (node->ip_version == 4 && horrible_match_v4(node, ip)) + return node->value; } - return ret; + return NULL; } static __init void * -horrible_allowedips_lookup_v6(struct horrible_allowedips *table, - struct in6_addr *ip) +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip) { struct horrible_allowedips_node *node; - void *ret = NULL; hlist_for_each_entry(node, &table->head, table) { - if (node->ip_version != 6) + if (node->ip_version == 6 && horrible_match_v6(node, ip)) + return node->value; + } + return NULL; +} + + +static __init void +horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + if (node->value != value) continue; - if (horrible_match_v6(node, ip)) { - ret = node->value; - break; - } + hlist_del(&node->table); + kfree(node); } - return ret; + } static __init bool randomized_test(void) @@ -296,6 +278,7 @@ static __init bool randomized_test(void) goto free; } kref_init(&peers[i]->refcount); + INIT_LIST_HEAD(&peers[i]->allowedips_list); } mutex_lock(&mutex); @@ -333,7 +316,7 @@ static __init bool randomized_test(void) if (wg_allowedips_insert_v4(&t, (struct in_addr *)mutated, cidr, peer, &mutex) < 0) { - pr_err("allowedips random malloc: FAIL\n"); + pr_err("allowedips random self-test malloc: FAIL\n"); goto free_locked; } if (horrible_allowedips_insert_v4(&h, @@ -396,23 +379,33 @@ static __init bool randomized_test(void) print_tree(t.root6, 128); } - for (i = 0; i < NUM_QUERIES; ++i) { - prandom_bytes(ip, 4); - if (lookup(t.root4, 32, ip) != - horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { - pr_err("allowedips random self-test: FAIL\n"); - goto free; + for (j = 0;; ++j) { + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 4); + if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip); + pr_err("allowedips random v4 self-test: FAIL\n"); + goto free; + } + prandom_bytes(ip, 16); + if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { + pr_err("allowedips random v6 self-test: FAIL\n"); + goto free; + } } + if (j >= NUM_PEERS) + break; + mutex_lock(&mutex); + wg_allowedips_remove_by_peer(&t, peers[j], &mutex); + mutex_unlock(&mutex); + horrible_allowedips_remove_by_value(&h, peers[j]); } - for (i = 0; i < NUM_QUERIES; ++i) { - prandom_bytes(ip, 16); - if (lookup(t.root6, 128, ip) != - horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { - pr_err("allowedips random self-test: FAIL\n"); - goto free; - } + if (t.root4 || t.root6) { + pr_err("allowedips random self-test removal: FAIL\n"); + goto free; } + ret = true; free: diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index d9ad850daa79..8c496b747108 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4, if (new4) wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); - synchronize_rcu(); + synchronize_net(); sock_free(old4); sock_free(old6); } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 51ce767eaf88..7a6fd46d0c6e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) static void mac80211_hwsim_stop(struct ieee80211_hw *hw) { struct mac80211_hwsim_data *data = hw->priv; + data->started = false; hrtimer_cancel(&data->beacon_timer); + + while (!skb_queue_empty(&data->pending)) + ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); + wiphy_dbg(hw->wiphy, "%s\n", __func__); } diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 03a246e60fd9..21c4c34c52d8 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue, spin_unlock_irqrestore(&queue->lock, flags); } -s32 scaled_ppm_to_ppb(long ppm) +long scaled_ppm_to_ppb(long ppm) { /* * The 'freq' field in the 'struct timex' is in parts per @@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm) s64 ppb = 1 + ppm; ppb *= 125; ppb >>= 13; - return (s32) ppb; + return (long) ppb; } EXPORT_SYMBOL(scaled_ppm_to_ppb); @@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) delta = ktime_to_ns(kt); err = ops->adjtime(ops, delta); } else if (tx->modes & ADJ_FREQUENCY) { - s32 ppb = scaled_ppm_to_ppb(tx->freq); + long ppb = scaled_ppm_to_ppb(tx->freq); if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; if (ops->adjfine) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index a6d731e959a2..36a1319ec4bf 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -2284,7 +2284,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str mon_wdev->iftype = NL80211_IFTYPE_MONITOR; mon_ndev->ieee80211_ptr = mon_wdev; - ret = register_netdevice(mon_ndev); + ret = cfg80211_register_netdevice(mon_ndev); if (ret) { goto out; } @@ -2360,7 +2360,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy, adapter = rtw_netdev_priv(ndev); pwdev_priv = adapter_wdev_data(adapter); - unregister_netdevice(ndev); + cfg80211_unregister_netdevice(ndev); if (ndev == pwdev_priv->pmon_ndev) { pwdev_priv->pmon_ndev = NULL; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 565deea6ffe8..8612f8fc86c1 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -830,6 +830,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); struct virtchnl_proto_hdrs { u8 tunnel_level; + u8 pad[3]; /** * specify where protocol header start from. * 0 - from the outer layer diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 028f442530cf..60ffeb6b67ae 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, struct mlx5_hairpin_params *params); void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair); +void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp); #endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d47fd33b228..51d7f1b8b32a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp); * @ppm: Parts per million, but with a 16 bit binary fractional field */ -extern s32 scaled_ppm_to_ppb(long ppm); +extern long scaled_ppm_to_ppb(long ppm); /** * ptp_find_pin() - obtain the pin index of a given auxiliary function diff --git a/include/linux/socket.h b/include/linux/socket.h index b8fc5c53ba6f..0d8e3dcb7f88 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); - -extern struct ns_common *get_net_ns(struct ns_common *ns); #endif /* _LINUX_SOCKET_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 445b66c6eb7e..e89530d0d9c6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5537,7 +5537,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. - * This version can only be used while holding the RTNL. + * This version can only be used while holding the wiphy mutex. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -6392,7 +6392,12 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, /** * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header - * of injected frames + * of injected frames. + * + * To accurately parse and take into account rate and retransmission fields, + * you must initialize the chandef field in the ieee80211_tx_info structure + * of the skb before calling this function. + * * @skb: packet injected by userspace * @dev: the &struct device of this 802.11 device */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fa5887143f0d..bdc0459a595e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -184,6 +184,9 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); void net_ns_barrier(void); + +struct ns_common *get_net_ns(struct ns_common *ns); +struct net *get_net_ns_by_fd(int fd); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@ -203,13 +206,22 @@ static inline void net_ns_get_ownership(const struct net *net, } static inline void net_ns_barrier(void) {} + +static inline struct ns_common *get_net_ns(struct ns_common *ns) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_NET_NS */ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); diff --git a/include/net/sock.h b/include/net/sock.h index 0e962d8bc73b..7a7058f4f265 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1934,7 +1934,8 @@ static inline u32 net_tx_rndhash(void) static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); } static inline bool sk_rethink_txhash(struct sock *sk) @@ -2206,9 +2207,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock, static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } } @@ -2266,8 +2270,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk); static inline int sock_error(struct sock *sk) { int err; - if (likely(!sk->sk_err)) + + /* Avoid an atomic operation for the common case. + * This is racy since another cpu/thread can change sk_err under us. + */ + if (likely(data_race(!sk->sk_err))) return 0; + err = xchg(&sk->sk_err, 0); return -err; } diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index f0c35ce8628c..4fe842c3a3a9 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -54,7 +54,7 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_BT 28 /* virtio bluetooth */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_BT 40 /* virtio bluetooth */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index be18af481d7d..c7236daa2415 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, if (a && a->status & ATIF_PROBE) { a->status |= ATIF_PROBE_FAIL; /* - * we do not respond to probe or request packets for + * we do not respond to probe or request packets of * this address while we are probing this address */ goto unlock; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 789f257be24f..fc8be49010b9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) if (WARN_ON(!forw_packet->if_outgoing)) return; - if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) + if (forw_packet->if_outgoing->soft_iface != soft_iface) { + pr_warn("%s: soft interface switch for queued OGM\n", __func__); return; + } if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) return; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fd12f1652bdf..7d71d104fdfd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1610,8 +1610,13 @@ setup_failed: } else { /* Init failed, cleanup */ flush_work(&hdev->tx_work); - flush_work(&hdev->cmd_work); + + /* Since hci_rx_work() is possible to awake new cmd_work + * it should be flushed first to avoid unexpected call of + * hci_cmd_work() + */ flush_work(&hdev->rx_work); + flush_work(&hdev->cmd_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 251b9128f530..eed0dd066e12 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -762,7 +762,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - bh_lock_sock_nested(sk); + lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -771,7 +771,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) hci_dev_put(hdev); } - bh_unlock_sock(sk); + release_sock(sk); } read_unlock(&hci_sk_list.lock); } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 372e3b25aaa4..7dd51da73845 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) { struct l2cap_chan *chan; - bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan); + BT_DBG("pchan %p", pchan); chan = l2cap_chan_create(); if (!chan) @@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) */ atomic_set(&chan->nesting, L2CAP_NESTING_SMP); - bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan); + BT_DBG("created chan %p", chan); return chan; } @@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan) { struct smp_dev *smp; - bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); + BT_DBG("chan %p", chan); smp = chan->data; if (smp) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7ce8a77cc6b6..e013d33f1c7c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -90,8 +90,8 @@ struct bridge_mcast_stats { #endif struct br_tunnel_info { - __be64 tunnel_id; - struct metadata_dst *tunnel_dst; + __be64 tunnel_id; + struct metadata_dst __rcu *tunnel_dst; }; /* private vlan flags */ diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 0d3a8c01552e..01017448ebde 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, br_vlan_tunnel_rht_params); } +static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan) +{ + struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst); + + WRITE_ONCE(vlan->tinfo.tunnel_id, 0); + RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL); + dst_release(&tdst->dst); +} + void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan) { - if (!vlan->tinfo.tunnel_dst) + if (!rcu_access_pointer(vlan->tinfo.tunnel_dst)) return; rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, br_vlan_tunnel_rht_params); - vlan->tinfo.tunnel_id = 0; - dst_release(&vlan->tinfo.tunnel_dst->dst); - vlan->tinfo.tunnel_dst = NULL; + vlan_tunnel_info_release(vlan); } static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan, u32 tun_id) { - struct metadata_dst *metadata = NULL; + struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); int err; - if (vlan->tinfo.tunnel_dst) + if (metadata) return -EEXIST; metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, @@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, return -EINVAL; metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; - vlan->tinfo.tunnel_dst = metadata; - vlan->tinfo.tunnel_id = key; + rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata); + WRITE_ONCE(vlan->tinfo.tunnel_id, key); err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, br_vlan_tunnel_rht_params); @@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, return 0; out: - dst_release(&vlan->tinfo.tunnel_dst->dst); - vlan->tinfo.tunnel_dst = NULL; - vlan->tinfo.tunnel_id = 0; + vlan_tunnel_info_release(vlan); return err; } @@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan) { + struct metadata_dst *tunnel_dst; + __be64 tunnel_id; int err; - if (!vlan || !vlan->tinfo.tunnel_id) + if (!vlan) return 0; - if (unlikely(!skb_vlan_tag_present(skb))) + tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id); + if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb))) return 0; skb_dst_drop(skb); @@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, if (err) return err; - skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); + tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); + if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) + skb_dst_set(skb, &tunnel_dst->dst); return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 98f20efbfadf..bf774575ad71 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (n->nud_state == NUD_NOARP) || (tbl->is_multicast && tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 43b6ac4c4439..9b5a767eddd5 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -641,6 +641,18 @@ void __put_net(struct net *net) } EXPORT_SYMBOL_GPL(__put_net); +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ +struct ns_common *get_net_ns(struct ns_common *ns) +{ + return &get_net(container_of(ns, struct net, ns))->ns; +} +EXPORT_SYMBOL_GPL(get_net_ns); + struct net *get_net_ns_by_fd(int fd) { struct file *file; @@ -660,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd) fput(file); return net; } - -#else -struct net *get_net_ns_by_fd(int fd) -{ - return ERR_PTR(-EINVAL); -} -#endif EXPORT_SYMBOL_GPL(get_net_ns_by_fd); +#endif struct net *get_net_ns_by_pid(pid_t pid) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3e84279c4123..ec931b080156 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4842,10 +4842,12 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; - if (!skb->len) { - err = -EINVAL; + /* Notification info is only filled for bridge ports, not the bridge + * device itself. Therefore, a zero notification length is valid and + * should not result in an error. + */ + if (!skb->len) goto errout; - } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3ad22870298c..bbc3b4b62032 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1253,6 +1253,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; + bool is_zerocopy; u32 lo, hi; u16 len; @@ -1267,6 +1268,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; + is_zerocopy = uarg->zerocopy; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); @@ -1274,7 +1276,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; - if (!uarg->zerocopy) + if (!is_zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 2a6733a6449a..5d38e90895ac 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev, if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); - if (ops->get_module_info) + if (ops->get_module_eeprom_by_page) return ops->get_module_eeprom_by_page(dev, page_data, extack); return -EOPNOTSUPP; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 3fa7a394eabf..baa5d10043cb 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, if (eeprom.offset + eeprom.len > total_len) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) return -EFAULT; test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); + data = kcalloc(test_len, sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; ret = ops->get_tunable(dev, &tuna, data); @@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_phy_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; if (phy_drv_tunable) { diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index b3029fff715d..2d51b7ab4dc5 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; + len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */ + for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f17870ee558b..2f94d221c00e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, return err; } - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } @@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk) sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bfaf327e9d12..e0480c6cebaa 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); break; } kfree(doi_def); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2e35f68da40a..1c6429c353a9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1c9f71a37258..95a718397fd1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb) struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); + bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2) - ping_queue_rcv_skb(sk, skb2); + if (skb2 && !ping_queue_rcv_skb(sk, skb2)) + rc = true; sock_put(sk); - return true; } - pr_debug("no socket, dropping\n"); - return false; + if (!rc) + pr_debug("no socket, dropping\n"); + + return rc; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6787c55f6ab..6a36ac98476f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2056,6 +2056,19 @@ martian_source: return err; } +/* get device for dst_alloc with local routes */ +static struct net_device *ip_rt_get_dev(struct net *net, + const struct fib_result *res) +{ + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; + struct net_device *dev = NULL; + + if (nhc) + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); + + return dev ? : net->loopback_dev; +} + /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2212,7 +2225,7 @@ local_input: } } - rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15f5504adf5b..1307ad0d3b9e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); if (static_branch_unlikely(&udp_encap_needed_key)) { @@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err) { lock_sock(sk); + /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing + * with close() + */ + if (sock_flag(sk, SOCK_DEAD)) + goto out; + sk->sk_err = err; sk->sk_error_report(sk); __udp_disconnect(sk, 0); +out: release_sock(sk); return 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b0ef65eb9bd2..701eb82acd1c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index e204163c7036..92f3235fa287 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); +static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) +{ + if (likely(next != IPPROTO_ICMPV6)) + return false; + + if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) + return false; + + return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; +} + void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, nft_in(pkt)); - return; + if (nft_hook(pkt) == NF_INET_PRE_ROUTING || + nft_hook(pkt) == NF_INET_INGRESS) { + if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || + nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } } *dest = 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 199b080d418a..3fcd86f4dfdc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 1c572c8daced..6201965bd822 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1066,11 +1066,6 @@ out_error: goto partial_message; } - if (skb_has_frag_list(head)) { - kfree_skb_list(skb_shinfo(head)->frag_list); - skb_shinfo(head)->frag_list = NULL; - } - if (head != kcm->seq_skb) kfree_skb(head); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 9245c0421bda..fc34ae2b604c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2019, 2021 Intel Corporation */ #include <linux/debugfs.h> @@ -387,10 +387,17 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; + int ret; rtnl_lock(); + wiphy_lock(local->hw.wiphy); __ieee80211_suspend(&local->hw, NULL); - __ieee80211_resume(&local->hw); + ret = __ieee80211_resume(&local->hw); + wiphy_unlock(local->hw.wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + rtnl_unlock(); return count; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 214404a558fb..648696b49f89 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON_ONCE(!chanctx_conf)) { + if (!chanctx_conf) { rcu_read_unlock(); return NULL; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2e2f73a4aa73..137fa4c50e07 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do GFP_KERNEL); } - /* APs need special treatment */ if (sdata->vif.type == NL80211_IFTYPE_AP) { - struct ieee80211_sub_if_data *vlan, *tmpsdata; - - /* down all dependent devices, that is VLANs */ - list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, - u.vlan.list) - dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { /* remove all packets in parent bc_buf pointing to this dev */ @@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + /* close all dependent VLAN interfaces before locking wiphy */ + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmpsdata; + + list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + } + wiphy_lock(sdata->local->hw.wiphy); ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -1591,6 +1593,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + if (!list_empty(&sdata->u.ap.vlans)) + return -EBUSY; + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_OCB: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 62145e5f9628..f33a3acd7f96 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -252,6 +252,7 @@ static void ieee80211_restart_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, restart_work); struct ieee80211_sub_if_data *sdata; + int ret; /* wait for scan work complete */ flush_workqueue(local->workqueue); @@ -301,8 +302,12 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for all packet processing to be done */ synchronize_net(); - ieee80211_reconfig(local); + ret = ieee80211_reconfig(local); wiphy_unlock(local->hw.wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + rtnl_unlock(); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1bb43edd47b6..af0ef456eb0f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2240,17 +2240,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (is_multicast_ether_addr(hdr->addr1)) { - I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); - goto out_no_led; - } - if (rx->sta) cache = &rx->sta->frags; if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; + if (is_multicast_ether_addr(hdr->addr1)) + return RX_DROP_MONITOR; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -2376,7 +2374,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); - out_no_led: if (rx->sta) rx->sta->rx_stats.packets++; return RX_CONTINUE; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index d4cc9ac2d703..6b50cb5e0e3c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; + size_t min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); + + if (!ieee80211_is_probe_resp(mgmt->frame_control) && + !ieee80211_is_beacon(mgmt->frame_control) && + !ieee80211_is_s1g_beacon(mgmt->frame_control)) + return; if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - if (skb->len < 15) - return; - } else if (skb->len < 24 || - (!ieee80211_is_probe_resp(mgmt->frame_control) && - !ieee80211_is_beacon(mgmt->frame_control))) + if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon); + } + + if (skb->len < min_hdr_len) return; sdata1 = rcu_dereference(local->scan_sdata); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0b719f3d2dec..2651498d05e8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2014,6 +2014,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, ieee80211_tx(sdata, sta, skb, false); } +static bool ieee80211_validate_radiotap_len(struct sk_buff *skb) +{ + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *)skb->data; + + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + return false; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(rthdr->it_version)) + return false; /* only version 0 is supported */ + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) + return false; /* skb too short for claimed rt header extent */ + + return true; +} + bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, struct net_device *dev) { @@ -2022,8 +2042,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_supported_band *sband = - local->hw.wiphy->bands[info->band]; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, NULL); u16 txflags; @@ -2036,17 +2054,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, u8 vht_mcs = 0, vht_nss = 0; int i; - /* check for not even having the fixed radiotap header part */ - if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) - return false; /* too short to be possibly valid */ - - /* is it a header version we can trust to find length from? */ - if (unlikely(rthdr->it_version)) - return false; /* only version 0 is supported */ - - /* does the skb contain enough to deliver on the alleged length? */ - if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) - return false; /* skb too short for claimed rt header extent */ + if (!ieee80211_validate_radiotap_len(skb)) + return false; info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_CTL_DONTFRAG; @@ -2186,6 +2195,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, return false; if (rate_found) { + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[info->band]; + info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { @@ -2199,7 +2211,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(info->control.rates, vht_mcs, vht_nss); - } else { + } else if (sband) { for (i = 0; i < sband->n_bitrates; i++) { if (rate * 5 != sband->bitrates[i].bitrate) continue; @@ -2236,8 +2248,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_CTL_INJECTED; - /* Sanity-check and process the injection radiotap header */ - if (!ieee80211_parse_tx_radiotap(skb, dev)) + /* Sanity-check the length of the radiotap header */ + if (!ieee80211_validate_radiotap_len(skb)) goto fail; /* we now know there is a radiotap header with a length we can use */ @@ -2351,6 +2363,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, ieee80211_select_queue_80211(sdata, skb, hdr); skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority)); + /* + * Process the radiotap header. This will now take into account the + * selected chandef above to accurately set injection rates and + * retransmissions. + */ + if (!ieee80211_parse_tx_radiotap(skb, dev)) + goto fail_rcu; + /* remove the injection radiotap header */ skb_pull(skb, len_rthdr); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0a0481f5af48..93d96a4f9c3e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2178,8 +2178,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) list_for_each_entry(ctx, &local->chanctx_list, list) ctx->driver_present = false; mutex_unlock(&local->chanctx_mtx); - - cfg80211_shutdown_all_interfaces(local->hw.wiphy); } static void ieee80211_assign_chanctx(struct ieee80211_local *local, diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b825fb3fa83..9b263f27ce9b 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -356,6 +356,8 @@ void mptcp_get_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5edc686faff1..632350018fb6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, /* try to fetch required memory from subflow */ if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - if (ssk->sk_forward_alloc < skb->truesize) - goto drop; - __sk_mem_reclaim(ssk, skb->truesize); - if (!sk_rmem_schedule(sk, skb, skb->truesize)) + int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; + + if (ssk->sk_forward_alloc < amount) goto drop; + + ssk->sk_forward_alloc -= amount; + sk->sk_forward_alloc += amount; } /* the skb map_seq accounts for the skb offset: @@ -668,18 +670,22 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ -static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) +static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0; if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - mptcp_data_lock(sk); + return false; __mptcp_move_skbs_from_subflow(msk, ssk, &moved); __mptcp_ofo_queue(msk); + if (unlikely(ssk->sk_err)) { + if (!sock_owned_by_user(sk)) + __mptcp_error_report(sk); + else + set_bit(MPTCP_ERROR_REPORT, &msk->flags); + } /* If the moves have caught up with the DATA_FIN sequence number * it's time to ack the DATA_FIN and change socket state, but @@ -688,7 +694,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - mptcp_data_unlock(sk); + return moved > 0; } void mptcp_data_ready(struct sock *sk, struct sock *ssk) @@ -696,7 +702,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(sk); int sk_rbuf, ssk_rbuf; - bool wake; /* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@ -705,28 +710,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (unlikely(subflow->disposable)) return; - /* move_skbs_to_msk below can legitly clear the data_avail flag, - * but we will need later to properly woke the reader, cache its - * value - */ - wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; - if (wake) - set_bit(MPTCP_DATA_READY, &msk->flags); - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); sk_rbuf = READ_ONCE(sk->sk_rcvbuf); if (unlikely(ssk_rbuf > sk_rbuf)) sk_rbuf = ssk_rbuf; - /* over limit? can't append more skbs to msk */ + /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) - goto wake; - - move_skbs_to_msk(msk, ssk); + return; -wake: - if (wake) + /* Wake-up the reader only for in-sequence data */ + mptcp_data_lock(sk); + if (move_skbs_to_msk(msk, ssk)) { + set_bit(MPTCP_DATA_READY, &msk->flags); sk->sk_data_ready(sk); + } + mptcp_data_unlock(sk); } static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) @@ -858,7 +857,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) sock_owned_by_me(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->data_avail) + if (READ_ONCE(subflow->data_avail)) return mptcp_subflow_tcp_sock(subflow); } @@ -1955,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); tcp_cleanup_rbuf(ssk, moved); + + if (unlikely(ssk->sk_err)) + __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); } while (!done); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0c6f99c67345..385796f0ef19 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk) enum mptcp_data_avail { MPTCP_SUBFLOW_NODATA, MPTCP_SUBFLOW_DATA_AVAIL, - MPTCP_SUBFLOW_OOO_DATA }; struct mptcp_delegated_action { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ef3d037f984a..be1de4084196 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); } -static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) +static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", - ssn, subflow->map_subflow_seq, subflow->map_data_len); + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + ssn, subflow->map_subflow_seq, subflow->map_data_len); } static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) @@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) /* Mapping covers data later in the subflow stream, * currently unsupported. */ - warn_bad_map(subflow, ssn); + dbg_bad_map(subflow, ssn); return false; } if (unlikely(!before(ssn, subflow->map_subflow_seq + subflow->map_data_len))) { /* Mapping does covers past subflow data, invalid */ - warn_bad_map(subflow, ssn + skb->len); + dbg_bad_map(subflow, ssn); return false; } return true; @@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); if (subflow->data_avail) return true; @@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk) ack_seq = mptcp_subflow_get_mapped_dsn(subflow); pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, ack_seq); - if (ack_seq == old_ack) { - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - break; - } else if (after64(ack_seq, old_ack)) { - subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; - break; + if (unlikely(before64(ack_seq, old_ack))) { + mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + continue; } - /* only accept in-sequence mapping. Old values are spurious - * retransmission - */ - mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + break; } return true; @@ -1065,12 +1060,11 @@ fallback: * subflow_error_report() will introduce the appropriate barriers */ ssk->sk_err = EBADMSG; - ssk->sk_error_report(ssk); tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); return false; } @@ -1080,7 +1074,7 @@ fallback: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; } @@ -1092,7 +1086,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1120,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = tcp_full_space(sk); } -static void subflow_data_ready(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - u16 state = 1 << inet_sk_state_load(sk); - struct sock *parent = subflow->conn; - struct mptcp_sock *msk; - - msk = mptcp_sk(parent); - if (state & TCPF_LISTEN) { - /* MPJ subflow are removed from accept queue before reaching here, - * avoid stray wakeups - */ - if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) - return; - - set_bit(MPTCP_DATA_READY, &msk->flags); - parent->sk_data_ready(parent); - return; - } - - WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && - !subflow->mp_join && !(state & TCPF_CLOSE)); - - if (mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent, sk); -} - -static void subflow_write_space(struct sock *ssk) -{ - struct sock *sk = mptcp_subflow_ctx(ssk)->conn; - - mptcp_propagate_sndbuf(sk, ssk); - mptcp_write_space(sk); -} - void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1195,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk) mptcp_data_unlock(sk); } +static void subflow_data_ready(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + u16 state = 1 << inet_sk_state_load(sk); + struct sock *parent = subflow->conn; + struct mptcp_sock *msk; + + msk = mptcp_sk(parent); + if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + + set_bit(MPTCP_DATA_READY, &msk->flags); + parent->sk_data_ready(parent); + return; + } + + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join && !(state & TCPF_CLOSE)); + + if (mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); +} + +static void subflow_write_space(struct sock *ssk) +{ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); +} + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@ -1505,6 +1501,8 @@ static void subflow_state_change(struct sock *sk) */ if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b100c04a0e43..3d6d49420db8 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, int length = (th->doff * 4) - sizeof(*th); u8 buf[40], *ptr; + if (unlikely(length < 0)) + return false; + ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); if (ptr == NULL) return false; @@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, length--; continue; default: + if (length < 2) + return true; opsize = *ptr++; if (opsize < 2) return true; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 72bc759179ef..bf4d6ec9fc55 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4364,13 +4364,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_set_alloc_name(&ctx, set, name); kfree(name); if (err < 0) - goto err_set_alloc_name; + goto err_set_name; + + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + + INIT_LIST_HEAD(&set->bindings); + INIT_LIST_HEAD(&set->catchall_list); + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; + set->ktype = ktype; + set->klen = desc.klen; + set->dtype = dtype; + set->objtype = objtype; + set->dlen = desc.dlen; + set->flags = flags; + set->size = desc.size; + set->policy = policy; + set->udlen = udlen; + set->udata = udata; + set->timeout = timeout; + set->gc_int = gc_int; + + set->field_count = desc.field_count; + for (i = 0; i < desc.field_count; i++) + set->field_len[i] = desc.field_len[i]; + + err = ops->init(set, &desc, nla); + if (err < 0) + goto err_set_init; if (nla[NFTA_SET_EXPR]) { expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_alloc_name; + goto err_set_expr_alloc; } set->exprs[0] = expr; set->num_exprs++; @@ -4381,75 +4413,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_EXPR)) { err = -EINVAL; - goto err_set_alloc_name; + goto err_set_expr_alloc; } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { err = -E2BIG; - goto err_set_init; + goto err_set_expr_alloc; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; - goto err_set_init; + goto err_set_expr_alloc; } expr = nft_set_elem_expr_alloc(&ctx, set, tmp); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_init; + goto err_set_expr_alloc; } set->exprs[i++] = expr; set->num_exprs++; } } - udata = NULL; - if (udlen) { - udata = set->data + size; - nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); - } - - INIT_LIST_HEAD(&set->bindings); - INIT_LIST_HEAD(&set->catchall_list); - set->table = table; - write_pnet(&set->net, net); - set->ops = ops; - set->ktype = ktype; - set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; - set->dlen = desc.dlen; - set->flags = flags; - set->size = desc.size; - set->policy = policy; - set->udlen = udlen; - set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; set->handle = nf_tables_alloc_handle(table); - set->field_count = desc.field_count; - for (i = 0; i < desc.field_count; i++) - set->field_len[i] = desc.field_len[i]; - - err = ops->init(set, &desc, nla); - if (err < 0) - goto err_set_init; - err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err_set_trans; + goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; -err_set_trans: - ops->destroy(set); -err_set_init: +err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); -err_set_alloc_name: + + ops->destroy(set); +err_set_init: kfree(set->name); err_set_name: kvfree(set); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ae906eb4b269..74e6e45a8e84 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - if (po->tx_ring.pg_vec) + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. + * tpacket_snd() will redo the check safely. + */ + if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); - else - return packet_snd(sock, msg, len); + + return packet_snd(sock, msg, len); } /* diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c0477bec09bd..f2efaa4225f9 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) struct qrtr_sock *ipc; struct sk_buff *skb; struct qrtr_cb *cb; - unsigned int size; + size_t size; unsigned int ver; size_t hdrlen; diff --git a/net/rds/recv.c b/net/rds/recv.c index 4db109fb6ec2..5b426dc3634d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (rds_cmsg_recv(inc, msg, rs)) { ret = -EFAULT; - goto out; + break; } rds_recvmsg_zcookie(rs, msg); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 18edd9ad1410..a656baa321fe 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb, } err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); - if (err == NF_ACCEPT && - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ct_nat_execute(skb, ct, ctinfo, range, + maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = ct_nat_execute(skb, ct, ctinfo, NULL, + NF_NAT_MANIP_SRC); + } } return err; #else diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 7d37638ee1c7..951542843cab 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb, } tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); - if (!tcph) + if (!tcph || tcph->doff < 5) return NULL; return skb_header_pointer(skb, offset, @@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph, length--; continue; } + if (length < 2) + break; opsize = *ptr++; if (opsize < 2 || opsize > length) break; @@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph, length--; continue; } + if (length < 2) + break; opsize = *ptr++; if (opsize < 2 || opsize > length) break; @@ -2338,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch) /* List of known Diffserv codepoints: * - * Least Effort (CS1) + * Least Effort (CS1, LE) * Best Effort (CS0) * Max Reliability & LLT "Lo" (TOS1) * Max Throughput (TOS2) @@ -2360,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Total 25 codepoints. */ -/* List of traffic classes in RFC 4594: +/* List of traffic classes in RFC 4594, updated by RFC 8622: * (roughly descending order of contended priority) * (roughly ascending order of uncontended throughput) * @@ -2375,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Ops, Admin, Management (CS2,TOS1) - eg. ssh * Standard Service (CS0 & unrecognised codepoints) * High Throughput Data (AF1x,TOS2) - eg. web traffic - * Low Priority Data (CS1) - eg. BitTorrent + * Low Priority Data (CS1,LE) - eg. BitTorrent * Total 12 traffic classes. */ @@ -2391,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch) * Video Streaming (AF4x, AF3x, CS3) * Bog Standard (CS0 etc.) * High Throughput (AF1x, TOS2) - * Background Traffic (CS1) + * Background Traffic (CS1, LE) * * Total 8 traffic classes. */ @@ -2435,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) * Best Effort (CS0, AF1x, TOS2, and those not specified) - * Background Traffic (CS1) + * Background Traffic (CS1, LE) * * Total 4 traffic classes. */ @@ -2473,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) static int cake_config_diffserv3(struct Qdisc *sch) { /* Simplified Diffserv structure with 3 tins. - * Low Priority (CS1) + * Low Priority (CS1, LE) * Best Effort * Latency Sensitive (TOS4, VA, EF, CS6, CS7) */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 081c11d5717c..8827987ba903 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1488,7 +1488,8 @@ static void htb_parent_to_leaf_offload(struct Qdisc *sch, struct Qdisc *old_q; /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ - qdisc_refcount_inc(new_q); + if (new_q) + qdisc_refcount_inc(new_q); old_q = htb_graft_helper(dev_queue, new_q); WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); } @@ -1675,10 +1676,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, cl->parent->common.classid, NULL); if (q->offload) { - if (new_q) { + if (new_q) htb_set_lockdep_class_child(new_q); - htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } + htb_parent_to_leaf_offload(sch, dev_queue, new_q); } } diff --git a/net/socket.c b/net/socket.c index 27e3e7d53f8e..4f2c6d2795d0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ -/** - * get_net_ns - increment the refcount of the network namespace - * @ns: common namespace (net) - * - * Returns the net's common namespace. - */ - -struct ns_common *get_net_ns(struct ns_common *ns) -{ - return &get_net(container_of(ns, struct net, ns))->ns; -} -EXPORT_SYMBOL_GPL(get_net_ns); - static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; diff --git a/net/wireless/core.c b/net/wireless/core.c index 6fbf7537faf5..8d0883e81093 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1340,6 +1340,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, rdev->devlist_generation++; wdev->registered = true; + if (wdev->netdev && + sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj, + "phy80211")) + pr_err("failed to add phy80211 symlink to netdev!\n"); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } @@ -1365,14 +1370,6 @@ int cfg80211_register_netdevice(struct net_device *dev) if (ret) goto out; - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, - "phy80211")) { - pr_err("failed to add phy80211 symlink to netdev!\n"); - unregister_netdevice(dev); - ret = -EINVAL; - goto out; - } - cfg80211_register_wdev(rdev, wdev); ret = 0; out: diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9b959e3b09c6..0c3f05c9be27 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev) if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); wiphy_unlock(&rdev->wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(&rdev->wiphy); + rtnl_unlock(); return ret; diff --git a/net/wireless/util.c b/net/wireless/util.c index 7ec021a610ae..18dba3d7c638 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ break; + case NL80211_IFTYPE_OCB: + cfg80211_leave_ocb(rdev, dev); + break; default: break; } diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 76d9487fb03c..5abe92d55b69 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1384,12 +1384,37 @@ ipv4_rt_replace() ipv4_rt_replace_mpath } +# checks that cached input route on VRF port is deleted +# when VRF is deleted +ipv4_local_rt_cache() +{ + run_cmd "ip addr add 10.0.0.1/32 dev lo" + run_cmd "ip netns add test-ns" + run_cmd "ip link add veth-outside type veth peer name veth-inside" + run_cmd "ip link add vrf-100 type vrf table 1100" + run_cmd "ip link set veth-outside master vrf-100" + run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-outside up" + run_cmd "ip link set vrf-100 up" + run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" + run_cmd "ip netns exec test-ns ip link set veth-inside up" + run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip link delete vrf-100" + + # if we do not hang test is a success + log_test $? 0 "Cached route removed from VRF port device" +} + ipv4_route_test() { route_setup ipv4_rt_add ipv4_rt_replace + ipv4_local_rt_cache route_cleanup } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9ca5f1ba461e..2b495dc8d78e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 -# use TCP syn cookies, even if no flooding was detected. -ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 - set_ethtool_flags() { local ns="$1" local dev="$2" @@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do exit $ret fi + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 + fi + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3171069a6b46..cd6430b39982 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ +TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh new file mode 100755 index 000000000000..6caf6ac8c285 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + +dmesg | grep -q ' nft_rpfilter: ' +if [ $? -eq 0 ]; then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 +ip netns add ${ns1} +ip netns add ${ns2} + +load_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + dmesg | grep -q ' nft_rpfilter: ' + if [ $? -eq 0 ]; then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" ) + ret=$? + + if [ $ret -ne 0 ];then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec ${ns} nft list table inet filter + return 1 + fi + + if [ $want -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset ${nsrouter} +load_ruleset ${ns1} +load_ruleset ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +sleep 3 + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec ${nsrouter} nft flush table inet filter + +ip -net ${ns1} route del default +ip -net ${ns1} -6 route del default + +ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr del dead:1::99/64 dev eth0 + +ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr add dead:2::99/64 dev eth0 + +ip -net ${ns1} route add default via 10.0.2.1 +ip -net ${ns1} -6 route add default via dead:2::1 + +ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count ${nsrouter} + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 +check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 + +ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 + +sleep 2 +ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 + +exit 0 diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 7ed7cd95e58f..ebc4ee0fe179 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0 ip1 -4 route add default dev wg0 table 51820 ip1 -4 rule add not fwmark 51820 table 51820 ip1 -4 rule add table main suppress_prefixlength 0 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. n1 ping -W 1 -c 100 -f 192.168.99.7 n1 ping -W 1 -c 100 -f abab::1111 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 4eecb432a66c..74db83a0aedd 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y |
