summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/s390/driver-model.rst2
-rw-r--r--Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml3
-rw-r--r--Documentation/netlink/genetlink-c.yaml5
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml5
-rw-r--r--Documentation/netlink/genetlink.yaml5
-rw-r--r--Documentation/netlink/specs/netdev.yaml15
-rw-r--r--arch/s390/include/asm/irq.h1
-rw-r--r--arch/s390/kernel/irq.c1
-rw-r--r--drivers/net/bonding/bond_main.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c16
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h7
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c582
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h1
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c36
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c8
-rw-r--r--drivers/net/ethernet/realtek/Kconfig3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c1
-rw-r--r--drivers/net/phy/phy-core.c20
-rw-r--r--drivers/net/phy/realtek/Kconfig8
-rw-r--r--drivers/net/phy/realtek/realtek_main.c9
-rw-r--r--drivers/net/vxlan/vxlan_core.c32
-rw-r--r--drivers/s390/net/Kconfig11
-rw-r--r--drivers/s390/net/Makefile1
-rw-r--r--drivers/s390/net/lcs.c2385
-rw-r--r--drivers/s390/net/lcs.h342
-rw-r--r--include/linux/mlx4/device.h1
-rw-r--r--include/linux/mlx5/driver.h33
-rw-r--r--include/linux/mlx5/port.h3
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/rtnetlink.h1
-rw-r--r--include/net/netdev_rx_queue.h1
-rw-r--r--include/net/netmem.h21
-rw-r--r--include/net/page_pool/memory_provider.h45
-rw-r--r--include/net/page_pool/types.h4
-rw-r--r--include/uapi/linux/ethtool.h18
-rw-r--r--include/uapi/linux/netdev.h7
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/core/dev.c31
-rw-r--r--net/core/devmem.c93
-rw-r--r--net/core/devmem.h49
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/net-sysfs.c392
-rw-r--r--net/core/netdev-genl.c11
-rw-r--r--net/core/netdev_rx_queue.c69
-rw-r--r--net/core/page_pool.c51
-rw-r--r--net/core/page_pool_user.c7
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/ethtool/common.c42
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--tools/include/uapi/linux/netdev.h7
-rw-r--r--tools/net/ynl/Makefile.deps4
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py8
-rw-r--r--tools/testing/selftests/drivers/net/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_fragmented_msg.sh122
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh2
-rw-r--r--tools/testing/selftests/net/ynl.mk3
85 files changed, 1566 insertions, 3355 deletions
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst
index ad4bc2dbea43..ad18f129fb0b 100644
--- a/Documentation/arch/s390/driver-model.rst
+++ b/Documentation/arch/s390/driver-model.rst
@@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
--------------------
The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
-devices, like lcs or ctc.
+devices, like qeth or ctc.
The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
this attributes creates a ccwgroup device consisting of these ccw devices (if
diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
index 9bcbacb6640d..55d6a8379025 100644
--- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
+++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml
@@ -44,6 +44,9 @@ properties:
phy-mode:
enum:
- rgmii
+ - rgmii-id
+ - rgmii-rxid
+ - rgmii-txid
- rmii
phy-handle: true
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 9660ffb1ed6a..44f2226160ca 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 16380e12cabe..ed64acf1bef7 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index b036227b46f1..e43e50dba2e4 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -14,9 +14,10 @@ $defs:
pattern: ^[0-9A-Za-z_-]+( - 1)?$
minimum: 0
len-or-limit:
- # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc.
+ # literal int, const name, or limit based on fixed-width type
+ # e.g. u8-min, u16-max, etc.
type: [ string, integer ]
- pattern: ^[su](8|16|32|64)-(min|max)$
+ pattern: ^[0-9A-Za-z_-]+$
minimum: 0
# Schema for specs
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index cbb544bd6c84..288923e965ae 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -115,6 +115,9 @@ attribute-sets:
type: u64
enum: xsk-flags
-
+ name: io-uring-provider-info
+ attributes: []
+ -
name: page-pool
attributes:
-
@@ -171,6 +174,11 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf this page-pool is attached to.
type: u32
+ -
+ name: io-uring
+ doc: io-uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
-
name: page-pool-info
subset-of: page-pool
@@ -296,6 +304,11 @@ attribute-sets:
name: dmabuf
doc: ID of the dmabuf attached to this queue, if any.
type: u32
+ -
+ name: io-uring
+ doc: io_uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
-
name: qstats
@@ -572,6 +585,7 @@ operations:
- inflight-mem
- detach-time
- dmabuf
+ - io-uring
dump:
reply: *pp-reply
config-cond: page-pool
@@ -637,6 +651,7 @@ operations:
- napi-id
- ifindex
- dmabuf
+ - io-uring
dump:
request:
attributes:
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index d9e705f4a697..bde6a496df5f 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -54,7 +54,6 @@ enum interruption_class {
IRQIO_C70,
IRQIO_TAP,
IRQIO_VMR,
- IRQIO_LCS,
IRQIO_CTC,
IRQIO_ADM,
IRQIO_CSC,
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index ef7be599e1f7..7ca157ffab30 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
- {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e45bba240cbc..f6d0628a36d9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs)
struct bonding *bond;
struct slave *slave;
- if (!bond_dev)
- return NULL;
-
bond = netdev_priv(bond_dev);
if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)
return NULL;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index f5901f8e3907..f6b990b7f5b4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -226,7 +226,6 @@ struct __packed offload_info {
struct offload_port_info ports;
struct offload_ka_info kas;
struct offload_rr_info rrs;
- u8 buf[];
};
struct __packed hw_atl_utils_fw_rpc {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 6b6cb73482d7..1753bb87dfbd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout)
}
EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init);
-/* Get the octeon id assigned to the octeon device passed as argument.
- * This function is exported to other modules.
- * @param dev - octeon device pointer passed as a void *.
- * @return octeon device id
- */
-int lio_get_device_id(void *dev)
-{
- struct octeon_device *octeon_dev = (struct octeon_device *)dev;
- u32 i;
-
- for (i = 0; i < MAX_OCTEON_DEVICES; i++)
- if (octeon_device[i] == octeon_dev)
- return octeon_dev->octeon_id;
- return -1;
-}
-
void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
{
u64 instr_cnt;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index d26364c2ac81..19344b21f8fb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode,
*/
struct octeon_device *lio_get_device(u32 octeon_id);
-/** Get the octeon id assigned to the octeon device passed as argument.
- * This function is exported to other modules.
- * @param dev - octeon device pointer passed as a void *.
- * @return octeon device id
- */
-int lio_get_device_id(void *dev);
-
/** Read windowed register.
* @param oct - pointer to the Octeon device.
* @param addr - Address of the register to read.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index c7c2c15a1815..95e6f015a6af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1211,9 +1211,6 @@ struct adapter {
struct timer_list flower_stats_timer;
struct work_struct flower_stats_work;
- /* Ethtool Dump */
- struct ethtool_dump eth_dump;
-
/* HMA */
struct hma_data hma;
@@ -1233,6 +1230,10 @@ struct adapter {
/* Ethtool n-tuple */
struct cxgb4_ethtool_filter *ethtool_filters;
+
+ /* Ethtool Dump */
+ /* Must be last - ends in a flex-array member. */
+ struct ethtool_dump eth_dump;
};
/* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index b330020dc0d6..598df63518c5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -526,28 +526,6 @@ out:
return res;
}
-u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count)
-{
- struct mlx4_zone_entry *zone;
- int res = 0;
-
- spin_lock(&zones->lock);
-
- zone = __mlx4_find_zone_by_uid(zones, uid);
-
- if (NULL == zone) {
- res = -1;
- goto out;
- }
-
- __mlx4_free_from_zone(zone, obj, count);
-
-out:
- spin_unlock(&zones->lock);
-
- return res;
-}
-
u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count)
{
struct mlx4_zone_entry *zone;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index d7d856d1758a..b213094ea30f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc);
u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
int align, u32 skip_mask, u32 *puid);
-/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator
- * <zones>.
- */
-u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones,
- u32 uid, u32 obj, u32 count);
-
/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of
* specifying the uid when freeing an object, zone allocator could figure it by
* itself. Other parameters are similar to mlx4_zone_free.
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 4e43f4a7d246..e3d0b13c1610 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
return err;
}
-int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
-{
- struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
- struct mlx4_mac_table *table = &info->mac_table;
- int i;
-
- for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
- if (!table->refs[i])
- continue;
-
- if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
- *idx = i;
- return 0;
- }
- }
-
- return -ENOENT;
-}
-EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
-
static bool mlx4_need_mf_bond(struct mlx4_dev *dev)
{
int i, num_eth_ports = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 5f6a0605e4ae..f62fbfb67a1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode {
MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X,
MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X,
MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X,
+ MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X,
MLX5E_MAX_FEC_SUPPORTED_LINK_MODE,
};
#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X
#define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X
+#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X
#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \
do { \
@@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev,
return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE ||
(link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) ||
- (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE &&
- MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm));
+ (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
+ MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) ||
+ (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE &&
+ MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm));
}
/* get/set FEC admin field for a given speed */
@@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x);
break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x);
+ break;
default:
return -EINVAL;
}
@@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap,
case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X:
*fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x);
break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x);
+ break;
+ case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X:
+ *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x);
+ break;
default:
return -EINVAL;
}
@@ -494,6 +525,26 @@ out:
return 0;
}
+static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode,
+ u16 conf_fec)
+{
+ /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514.
+ * For link modes up to 25G per lane, the value is kept.
+ * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514.
+ * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD.
+ */
+ if (conf_fec != BIT(MLX5E_FEC_RS_528_514))
+ return conf_fec;
+
+ if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE)
+ return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD);
+
+ if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
+ return BIT(MLX5E_FEC_RS_544_514);
+
+ return conf_fec;
+}
+
int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
{
bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm);
@@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
if (!mlx5e_is_fec_supported_link_mode(dev, i))
break;
- /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514
- * to link modes up to 25G per lane and to
- * MLX5E_FEC_RS_544_514 in the new link modes based on
- * 50G or 100G per lane
- */
- if (conf_fec == (1 << MLX5E_FEC_RS_528_514) &&
- i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE)
- conf_fec = (1 << MLX5E_FEC_RS_544_514);
+ conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec);
mlx5e_get_fec_cap_field(out, &fec_caps, i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index d1da225f35da..fa2283dd383b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -61,6 +61,7 @@ enum {
MLX5E_FEC_NOFEC,
MLX5E_FEC_FIRECODE,
MLX5E_FEC_RS_528_514,
+ MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4,
MLX5E_FEC_RS_544_514 = 7,
MLX5E_FEC_LLRS_272_257_1 = 9,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index afd654583b6b..131ed97ca997 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
- sq->clock = &mdev->clock;
+ sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->priv = c->priv;
@@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
rq->pdev = c->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index d6c12d0ea55b..2e528b2c34d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -73,11 +73,6 @@ struct mlx5e_tc_act {
bool is_terminating_action;
};
-struct mlx5e_tc_flow_action {
- unsigned int num_entries;
- struct flow_action_entry **entries;
-};
-
extern struct mlx5e_tc_act mlx5e_tc_act_drop;
extern struct mlx5e_tc_act mlx5e_tc_act_trap;
extern struct mlx5e_tc_act mlx5e_tc_act_accept;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 53ca16cb9c41..140606fcd23b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
rq->pdev = t->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 94b291662087..3cc4d55613bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv)
ts = get_cqe_ts(priv->cqe);
if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev))
- return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts);
+ return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts);
- return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts);
+ return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts);
}
static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 9240cfe25d10..d743e823362a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cae39198b4db..f9113cb13a0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT,
ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT,
ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext,
+ ETHTOOL_LINK_MODE_200000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseVR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT);
}
static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
@@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = {
[MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS,
[MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS,
+ [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS,
};
static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a814b63ed97e..2fdc86432ac0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
rq->netdev = c->netdev;
rq->priv = c->priv;
rq->tstamp = c->tstamp;
- rq->clock = &mdev->clock;
+ rq->clock = mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
rq->channel = c;
@@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
- sq->clock = &mdev->clock;
+ sq->clock = mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->mdev = c->mdev;
@@ -3816,8 +3816,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
/* MQPRIO is another toplevel qdisc that can't be attached
* simultaneously with the offloaded HTB.
*/
- if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
- return -EINVAL;
+ if (mlx5e_selq_is_htb_enabled(&priv->selq)) {
+ NL_SET_ERR_MSG_MOD(mqprio->extack,
+ "MQPRIO cannot be configured when HTB offload is enabled.");
+ return -EOPNOTSUPP;
+ }
switch (mqprio->mode) {
case TC_MQPRIO_MODE_DCB:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index bde79cac33a9..d832a12ffec0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
mlx5_del_flow_rules(lag_definer->rules[idx]);
}
j = ldev->buckets;
- };
+ }
goto destroy_fg;
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index d61a1a9297c9..65a94e46edcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -43,6 +43,8 @@
#include <linux/cpufeature.h>
#endif /* CONFIG_X86 */
+#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity)
+
enum {
MLX5_PIN_MODE_IN = 0x0,
MLX5_PIN_MODE_OUT = 0x1,
@@ -77,6 +79,56 @@ enum {
MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000,
};
+struct mlx5_clock_dev_state {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_devcom_comp_dev *compdev;
+ struct mlx5_nb pps_nb;
+ struct work_struct out_work;
+};
+
+struct mlx5_clock_priv {
+ struct mlx5_clock clock;
+ struct mlx5_core_dev *mdev;
+ struct mutex lock; /* protect mdev and used in PTP callbacks */
+ struct mlx5_core_dev *event_mdev;
+};
+
+static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock)
+{
+ return container_of(clock, struct mlx5_clock_priv, clock);
+}
+
+static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock));
+}
+
+static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock)
+{
+ mlx5_clock_lockdep_assert(clock);
+
+ return clock_priv(clock)->mdev;
+}
+
+static void mlx5_clock_lock(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ mutex_lock(&clock_priv(clock)->lock);
+}
+
+static void mlx5_clock_unlock(struct mlx5_clock *clock)
+{
+ if (!clock->shared)
+ return;
+
+ mutex_unlock(&clock_priv(clock)->lock);
+}
+
static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev)
{
return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev));
@@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
}
+static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev,
+ u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE])
+{
+ u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in),
+ out, sizeof(out), MLX5_REG_MRTCQ, 0, 0);
+ if (!err)
+ memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity),
+ MLX5_RT_CLOCK_IDENTITY_SIZE);
+
+ return err;
+}
+
static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
{
/* Optimal shift constant leads to corrections above just 1 scaled ppm.
@@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz));
}
+static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
+ MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
+ MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
+}
+
static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
+ s32 ret;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ ret = mlx5_clock_getmaxphase(mdev);
+ mlx5_clock_unlock(clock);
- return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ?
- MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX :
- MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX;
+ return ret;
}
static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta)
{
- s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info);
+ s64 max = mlx5_clock_getmaxphase(mdev);
if (delta < -max || delta > max)
return false;
@@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time,
if (real_time_mode)
*device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX));
else
- *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device);
+ *device_time = mlx5_timecounter_cyc2time(mdev->clock, device);
return 0;
}
@@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp,
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct system_time_snapshot history_begin = {0};
struct mlx5_core_dev *mdev;
+ int err;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
- if (!mlx5_is_ptm_source_time_available(mdev))
- return -EBUSY;
+ if (!mlx5_is_ptm_source_time_available(mdev)) {
+ err = -EBUSY;
+ goto unlock;
+ }
ktime_get_snapshot(&history_begin);
- return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
- &history_begin, cts);
+ err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev,
+ &history_begin, cts);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
#endif /* CONFIG_X86 */
@@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
{
struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles);
struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock);
return mlx5_read_time(mdev, NULL, false) & cc->mask;
}
@@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer;
u32 sign;
@@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
static void mlx5_pps_out(struct work_struct *work)
{
- struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
- out_work);
- struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
- pps_info);
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
- clock);
+ struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state,
+ out_work);
+ struct mlx5_core_dev *mdev = clock_state->mdev;
+ struct mlx5_clock *clock = mdev->clock;
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
unsigned long flags;
int i;
@@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
unsigned long flags;
clock = container_of(ptp_info, struct mlx5_clock, ptp_info);
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
timer = &clock->timer;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
@@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
write_sequnlock_irqrestore(&clock->lock, flags);
out:
+ mlx5_clock_unlock(clock);
return timer->overflow_period;
}
@@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
return mlx5_set_mtutc(mdev, in, sizeof(in));
}
-static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock,
+ const struct timespec64 *ts)
{
- struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_timer *timer = &clock->timer;
- struct mlx5_core_dev *mdev;
unsigned long flags;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
-
if (mlx5_modify_mtutc_allowed(mdev)) {
int err = mlx5_ptp_settime_real_time(mdev, ts);
@@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64
return 0;
}
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
+ int err;
+
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ err = mlx5_clock_settime(mdev, clock, ts);
+ mlx5_clock_unlock(clock);
+
+ return err;
+}
+
static
struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev,
struct ptp_system_timestamp *sts)
@@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct mlx5_core_dev *mdev;
u64 cycles, ns;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_real_time_mode(mdev)) {
*ts = mlx5_ptp_gettimex_real_time(mdev, sts);
goto out;
@@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
ns = mlx5_timecounter_cyc2time(clock, cycles);
*ts = ns_to_timespec64(ns);
out:
+ mlx5_clock_unlock(clock);
return 0;
}
@@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err = 0;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
- int err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
if (err)
- return err;
+ goto unlock;
}
write_seqlock_irqsave(&clock->lock, flags);
@@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
- return 0;
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
struct mlx5_core_dev *mdev;
+ int err;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ err = mlx5_ptp_adjtime_real_time(mdev, delta);
+ mlx5_clock_unlock(clock);
- return mlx5_ptp_adjtime_real_time(mdev, delta);
+ return err;
}
static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
@@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
struct mlx5_timer *timer = &clock->timer;
struct mlx5_core_dev *mdev;
unsigned long flags;
+ int err = 0;
u32 mult;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
if (mlx5_modify_mtutc_allowed(mdev)) {
- int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
+ err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
if (err)
- return err;
+ goto unlock;
}
mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
@@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
write_sequnlock_irqrestore(&clock->lock, flags);
ptp_schedule_worker(clock->ptp, timer->overflow_period);
- return 0;
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_extts_configure(struct ptp_clock_info *ptp,
@@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
- struct mlx5_core_dev *mdev =
- container_of(clock, struct mlx5_core_dev, clock);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ struct mlx5_core_dev *mdev;
u32 field_select = 0;
u8 pin_mode = 0;
u8 pattern = 0;
int pin = -1;
int err = 0;
- if (!MLX5_PPS_CAP(mdev))
- return -EOPNOTSUPP;
-
/* Reject requests with unsupported flags */
if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
PTP_RISING_EDGE |
@@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
field_select = MLX5_MTPPS_FS_ENABLE;
}
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+
+ if (!MLX5_PPS_CAP(mdev)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
MLX5_SET(mtpps_reg, in, pin, pin);
MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
MLX5_SET(mtpps_reg, in, pattern, pattern);
@@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
- return err;
+ goto unlock;
+
+ err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
+ if (err)
+ goto unlock;
+
+ clock->pps_info.pin_armed[pin] = on;
+ clock_priv(clock)->event_mdev = mdev;
- return mlx5_set_mtppse(mdev, pin, 0,
- MLX5_EVENT_MODE_REPETETIVE & on);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
u64 cycles_now, cycles_delta;
u64 nsec_now, nsec_delta;
struct mlx5_timer *timer;
@@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev,
struct ptp_clock_request *rq,
u32 *out_pulse_duration_ns)
{
- struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct mlx5_pps *pps_info = &mdev->clock->pps_info;
u32 out_pulse_duration;
struct timespec64 ts;
@@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo
u32 *field_select, u32 *out_pulse_duration_ns,
u64 *period, u64 *time_stamp)
{
- struct mlx5_pps *pps_info = &mdev->clock.pps_info;
+ struct mlx5_pps *pps_info = &mdev->clock->pps_info;
struct ptp_clock_time *time = &rq->perout.start;
struct timespec64 ts;
@@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
{
struct mlx5_clock *clock =
container_of(ptp, struct mlx5_clock, ptp_info);
- struct mlx5_core_dev *mdev =
- container_of(clock, struct mlx5_core_dev, clock);
- bool rt_mode = mlx5_real_time_mode(mdev);
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
u32 out_pulse_duration_ns = 0;
+ struct mlx5_core_dev *mdev;
u32 field_select = 0;
u64 npps_period = 0;
u64 time_stamp = 0;
u8 pin_mode = 0;
u8 pattern = 0;
+ bool rt_mode;
int pin = -1;
int err = 0;
- if (!MLX5_PPS_CAP(mdev))
- return -EOPNOTSUPP;
-
- /* Reject requests with unsupported flags */
- if (mlx5_perout_verify_flags(mdev, rq->perout.flags))
- return -EOPNOTSUPP;
-
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
@@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (pin < 0)
return -EBUSY;
- if (on) {
- bool rt_mode = mlx5_real_time_mode(mdev);
+ mlx5_clock_lock(clock);
+ mdev = mlx5_clock_mdev_get(clock);
+ rt_mode = mlx5_real_time_mode(mdev);
+
+ if (!MLX5_PPS_CAP(mdev)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ /* Reject requests with unsupported flags */
+ if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ if (on) {
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
- if (rt_mode && rq->perout.start.sec > U32_MAX)
- return -EINVAL;
+ if (rt_mode && rq->perout.start.sec > U32_MAX) {
+ err = -EINVAL;
+ goto unlock;
+ }
field_select |= MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
@@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
else
err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode);
if (err)
- return err;
+ goto unlock;
}
MLX5_SET(mtpps_reg, in, pin, pin);
@@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns);
err = mlx5_set_mtpps(mdev, in, sizeof(in));
if (err)
- return err;
+ goto unlock;
if (rt_mode)
- return 0;
+ goto unlock;
+
+ err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on);
- return mlx5_set_mtppse(mdev, pin, 0,
- MLX5_EVENT_MODE_REPETETIVE & on);
+unlock:
+ mlx5_clock_unlock(clock);
+ return err;
}
static int mlx5_pps_configure(struct ptp_clock_info *ptp,
@@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
mtpps_size, MLX5_REG_MTPPS, 0, 0);
}
-static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
+static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin)
{
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
-
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {};
u8 mode;
int err;
@@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
return PTP_PF_NONE;
}
-static void mlx5_init_pin_config(struct mlx5_clock *clock)
+static void mlx5_init_pin_config(struct mlx5_core_dev *mdev)
{
+ struct mlx5_clock *clock = mdev->clock;
int i;
if (!clock->ptp_info.n_pins)
@@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock)
sizeof(clock->ptp_info.pin_config[i].name),
"mlx5_pps%d", i);
clock->ptp_info.pin_config[i].index = i;
- clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
+ clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i);
clock->ptp_info.pin_config[i].chan = 0;
}
}
static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ struct mlx5_clock *clock = mdev->clock;
mlx5_query_mtpps(mdev, out, sizeof(out));
@@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev,
static int mlx5_pps_event(struct notifier_block *nb,
unsigned long type, void *data)
{
- struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state,
+ pps_nb);
+ struct mlx5_core_dev *mdev = clock_state->mdev;
+ struct mlx5_clock *clock = mdev->clock;
struct ptp_clock_event ptp_event;
struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- struct mlx5_core_dev *mdev;
unsigned long flags;
u64 ns;
- mdev = container_of(clock, struct mlx5_core_dev, clock);
-
switch (clock->ptp_info.pin_config[pin].func) {
case PTP_PF_EXTTS:
ptp_event.index = pin;
@@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb,
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
+ if (clock->shared) {
+ mlx5_core_warn(mdev, " Received unexpected PPS out event\n");
+ break;
+ }
ns = perout_conf_next_event_timer(mdev, clock);
write_seqlock_irqsave(&clock->lock, flags);
clock->pps_info.start[pin] = ns;
write_sequnlock_irqrestore(&clock->lock, flags);
- schedule_work(&clock->pps_info.out_work);
+ schedule_work(&clock_state->out_work);
break;
default:
mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
@@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb,
static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u32 dev_freq;
@@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
}
-static void mlx5_init_overflow_period(struct mlx5_clock *clock)
+static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock);
struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_timer *timer = &clock->timer;
u64 overflow_cycles;
u64 frac = 0;
@@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock)
static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
struct mlx5_ib_clock_info *info;
struct mlx5_timer *timer;
@@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
u8 log_max_freq_adjustment = 0;
@@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
/* Configure the PHC */
clock->ptp_info = mlx5_ptp_clock_info;
@@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
mlx5_timecounter_init(mdev);
mlx5_init_clock_info(mdev);
- mlx5_init_overflow_period(clock);
+ mlx5_init_overflow_period(mdev);
if (mlx5_real_time_mode(mdev)) {
struct timespec64 ts;
ktime_get_real_ts64(&ts);
- mlx5_ptp_settime(&clock->ptp_info, &ts);
+ mlx5_clock_settime(mdev, clock, &ts);
}
}
static void mlx5_init_pps(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
-
if (!MLX5_PPS_CAP(mdev))
return;
mlx5_get_pps_caps(mdev);
- mlx5_init_pin_config(clock);
+ mlx5_init_pin_config(mdev);
}
-void mlx5_init_clock(struct mlx5_core_dev *mdev)
+static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
-
- if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
- mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
- return;
- }
+ struct mlx5_clock *clock = mdev->clock;
seqlock_init(&clock->lock);
- INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
/* Initialize the device clock */
mlx5_init_timer_clock(mdev);
@@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
mlx5_init_pps(mdev);
clock->ptp = ptp_clock_register(&clock->ptp_info,
- &mdev->pdev->dev);
+ clock->shared ? NULL : &mdev->pdev->dev);
if (IS_ERR(clock->ptp)) {
- mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n",
+ clock->shared ? "shared clock " : "",
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
- MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
- mlx5_eq_notifier_register(mdev, &clock->pps_nb);
-
if (clock->ptp)
ptp_schedule_worker(clock->ptp, 0);
}
-void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mdev->clock;
- if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
- return;
-
- mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
}
- cancel_work_sync(&clock->pps_info.out_work);
-
if (mdev->clock_info) {
free_page((unsigned long)mdev->clock_info);
mdev->clock_info = NULL;
@@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
kfree(clock->ptp_info.pin_config);
}
+
+static void mlx5_clock_free(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock);
+
+ mlx5_destroy_clock_dev(mdev);
+ mutex_destroy(&cpriv->lock);
+ kfree(cpriv);
+ mdev->clock = NULL;
+}
+
+static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared)
+{
+ struct mlx5_clock_priv *cpriv;
+ struct mlx5_clock *clock;
+
+ cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL);
+ if (!cpriv)
+ return -ENOMEM;
+
+ mutex_init(&cpriv->lock);
+ cpriv->mdev = mdev;
+ clock = &cpriv->clock;
+ clock->shared = shared;
+ mdev->clock = clock;
+ mlx5_clock_lock(clock);
+ mlx5_init_clock_dev(mdev);
+ mlx5_clock_unlock(clock);
+
+ if (!clock->shared)
+ return 0;
+
+ if (!clock->ptp) {
+ mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions");
+ mlx5_clock_free(mdev);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_devcom_comp_dev *pos;
+
+ mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc,
+ MLX5_DEVCOM_SHARED_CLOCK,
+ key, NULL, mdev);
+ if (IS_ERR(mdev->clock_state->compdev))
+ return;
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ if (next) {
+ mdev->clock = next->clock;
+ /* clock info is shared among all the functions using the same clock */
+ mdev->clock_info = next->clock_info;
+ } else {
+ mlx5_clock_alloc(mdev, true);
+ }
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+ if (!mdev->clock) {
+ mlx5_devcom_unregister_component(mdev->clock_state->compdev);
+ mdev->clock_state->compdev = NULL;
+ }
+}
+
+static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_devcom_comp_dev *pos;
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock && peer_dev != mdev) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ if (next) {
+ struct mlx5_clock_priv *cpriv = clock_priv(clock);
+
+ mlx5_clock_lock(clock);
+ if (mdev == cpriv->mdev)
+ cpriv->mdev = next;
+ mlx5_clock_unlock(clock);
+ } else {
+ mlx5_clock_free(mdev);
+ }
+
+ mdev->clock = NULL;
+ mdev->clock_info = NULL;
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+ mlx5_devcom_unregister_component(mdev->clock_state->compdev);
+}
+
+static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock,
+ struct mlx5_core_dev *new_mdev,
+ struct mlx5_core_dev *old_mdev)
+{
+ struct ptp_clock_info *ptp_info = &clock->ptp_info;
+ struct mlx5_clock_priv *cpriv = clock_priv(clock);
+ int i;
+
+ for (i = 0; i < ptp_info->n_pins; i++) {
+ if (ptp_info->pin_config[i].func != PTP_PF_EXTTS ||
+ !clock->pps_info.pin_armed[i])
+ continue;
+
+ if (new_mdev) {
+ mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE);
+ cpriv->event_mdev = new_mdev;
+ } else {
+ cpriv->event_mdev = NULL;
+ }
+
+ if (old_mdev)
+ mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE);
+ }
+}
+
+void mlx5_clock_load(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_clock_priv *cpriv;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out);
+ MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb);
+
+ if (!clock->shared) {
+ mlx5_clock_arm_pps_in_event(clock, mdev, NULL);
+ return;
+ }
+
+ cpriv = clock_priv(clock);
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_clock_lock(clock);
+ if (mdev == cpriv->mdev && mdev != cpriv->event_mdev)
+ mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev);
+ mlx5_clock_unlock(clock);
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+}
+
+void mlx5_clock_unload(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_dev, *next = NULL;
+ struct mlx5_clock *clock = mdev->clock;
+ struct mlx5_devcom_comp_dev *pos;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (!clock->shared) {
+ mlx5_clock_arm_pps_in_event(clock, NULL, mdev);
+ goto out;
+ }
+
+ mlx5_devcom_comp_lock(mdev->clock_state->compdev);
+ mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) {
+ if (peer_dev->clock && peer_dev != mdev) {
+ next = peer_dev;
+ break;
+ }
+ }
+
+ mlx5_clock_lock(clock);
+ if (mdev == clock_priv(clock)->event_mdev)
+ mlx5_clock_arm_pps_in_event(clock, next, mdev);
+ mlx5_clock_unlock(clock);
+ mlx5_devcom_comp_unlock(mdev->clock_state->compdev);
+
+out:
+ mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb);
+ cancel_work_sync(&mdev->clock_state->out_work);
+}
+
+static struct mlx5_clock null_clock;
+
+int mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE];
+ struct mlx5_clock_dev_state *clock_state;
+ u64 key;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) {
+ mdev->clock = &null_clock;
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return 0;
+ }
+
+ clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL);
+ if (!clock_state)
+ return -ENOMEM;
+ clock_state->mdev = mdev;
+ mdev->clock_state = clock_state;
+
+ if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) {
+ if (mlx5_clock_identity_get(mdev, identity)) {
+ mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n");
+ } else {
+ memcpy(&key, &identity, sizeof(key));
+ mlx5_shared_clock_register(mdev, key);
+ }
+ }
+
+ if (!mdev->clock) {
+ err = mlx5_clock_alloc(mdev, false);
+ if (err) {
+ kfree(clock_state);
+ mdev->clock_state = NULL;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (mdev->clock->shared)
+ mlx5_shared_clock_unregister(mdev);
+ else
+ mlx5_clock_free(mdev);
+ kfree(mdev->clock_state);
+ mdev->clock_state = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index bd95b9f8d143..c18a652c0faa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -33,6 +33,35 @@
#ifndef __LIB_CLOCK_H__
#define __LIB_CLOCK_H__
+#include <linux/ptp_clock_kernel.h>
+
+#define MAX_PIN_NUM 8
+struct mlx5_pps {
+ u8 pin_caps[MAX_PIN_NUM];
+ u64 start[MAX_PIN_NUM];
+ u8 enabled;
+ u64 min_npps_period;
+ u64 min_out_pulse_duration_ns;
+ bool pin_armed[MAX_PIN_NUM];
+};
+
+struct mlx5_timer {
+ struct cyclecounter cycles;
+ struct timecounter tc;
+ u32 nominal_c_mult;
+ unsigned long overflow_period;
+};
+
+struct mlx5_clock {
+ seqlock_t lock;
+ struct hwtstamp_config hwtstamp_config;
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+ struct mlx5_pps pps_info;
+ struct mlx5_timer timer;
+ bool shared;
+};
+
static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev)
{
u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format);
@@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev)
typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64);
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-void mlx5_init_clock(struct mlx5_core_dev *mdev);
+int mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+void mlx5_clock_load(struct mlx5_core_dev *mdev);
+void mlx5_clock_unload(struct mlx5_core_dev *mdev);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
- return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+ return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1;
}
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
@@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
return ns_to_ktime(time);
}
#else
-static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; }
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {}
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index d58032dd0df7..c79699b94a02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -11,6 +11,7 @@ enum mlx5_devcom_component {
MLX5_DEVCOM_MPV,
MLX5_DEVCOM_HCA_PORTS,
MLX5_DEVCOM_SD_GROUP,
+ MLX5_DEVCOM_SHARED_CLOCK,
MLX5_DEVCOM_NUM_COMPONENTS,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ec956c4bcebd..710633d5fdbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
mlx5_init_reserved_gids(dev);
- mlx5_init_clock(dev);
+ err = mlx5_init_clock(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize hardware clock\n");
+ goto err_tables_cleanup;
+ }
dev->vxlan = mlx5_vxlan_create(dev);
dev->geneve = mlx5_geneve_create(dev);
@@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
err = mlx5_init_rl_table(dev);
if (err) {
mlx5_core_err(dev, "Failed to init rate limiting\n");
- goto err_tables_cleanup;
+ goto err_clock_cleanup;
}
err = mlx5_mpfs_init(dev);
@@ -1123,10 +1127,11 @@ err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
mlx5_cleanup_rl_table(dev);
-err_tables_cleanup:
+err_clock_cleanup:
mlx5_geneve_destroy(dev->geneve);
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_clock(dev);
+err_tables_cleanup:
mlx5_cleanup_reserved_gids(dev);
mlx5_cq_debugfs_cleanup(dev);
mlx5_fw_reset_cleanup(dev);
@@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_eq_table;
}
+ mlx5_clock_load(dev);
+
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
@@ -1442,6 +1449,7 @@ err_fpga_start:
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
err_eq_table:
mlx5_irq_table_destroy(dev);
@@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_reset_events_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_clock_unload(dev);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 50931584132b..3995df064101 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
[MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
[MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
[MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000,
+ [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000,
+ [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000,
+ [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000,
};
int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
index 60cb4527588a..65740bb68b09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c
@@ -516,30 +516,6 @@ def_xa_destroy:
return NULL;
}
-/* Assure synchronization of the device steering tables with updates made by SW
- * insertion.
- */
-int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
-{
- int ret = 0;
-
- if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
- mlx5dr_domain_lock(dmn);
- ret = mlx5dr_send_ring_force_drain(dmn);
- mlx5dr_domain_unlock(dmn);
- if (ret) {
- mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
- flags, ret);
- return ret;
- }
- }
-
- if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
- ret = mlx5dr_cmd_sync_steering(dmn->mdev);
-
- return ret;
-}
-
int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
{
if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
index f57c84e5128b..4fd4e8483382 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c
@@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
kfree(send_ring->sync_buff);
kfree(send_ring);
}
-
-int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
-{
- struct mlx5dr_send_ring *send_ring = dmn->send_ring;
- struct postsend_info send_info = {};
- u8 data[DR_STE_SIZE];
- int num_of_sends_req;
- int ret;
- int i;
-
- /* Sending this amount of requests makes sure we will get drain */
- num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
-
- /* Send fake requests forcing the last to be signaled */
- send_info.write.addr = (uintptr_t)data;
- send_info.write.length = DR_STE_SIZE;
- send_info.write.lkey = 0;
- /* Using the sync_mr in order to write/read */
- send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
- send_info.rkey = send_ring->sync_mr->mkey;
-
- for (i = 0; i < num_of_sends_req; i++) {
- ret = dr_postsend_icm_data(dmn, &send_info);
- if (ret)
- return ret;
- }
-
- spin_lock(&send_ring->lock);
- ret = dr_handle_pending_wc(dmn, send_ring);
- spin_unlock(&send_ring->lock);
-
- return ret;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
index 7618c6147f86..cc328292bf84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h
@@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring {
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
struct mlx5dr_send_ring *send_ring);
-int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
struct mlx5dr_ste *ste,
u8 *data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
index 0bb3724c10c2..fc8a2169d1a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h
@@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
-int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
-
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn,
u16 peer_vhca_id);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index b10f80fc651b..fa7082ee5183 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -754,9 +754,6 @@ void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev);
-bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev);
-u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev);
u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7d6d859cef3f..464821dd492d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
return NULL;
}
-bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev)
-{
- struct mlxsw_sp_rif *rif;
-
- mutex_lock(&mlxsw_sp->router->lock);
- rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
- mutex_unlock(&mlxsw_sp->router->lock);
-
- return rif;
-}
-
-u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
-{
- struct mlxsw_sp_rif *rif;
- u16 vid = 0;
-
- mutex_lock(&mlxsw_sp->router->lock);
- rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
- if (!rif)
- goto out;
-
- /* We only return the VID for VLAN RIFs. Otherwise we return an
- * invalid value (0).
- */
- if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
- goto out;
-
- vid = mlxsw_sp_fid_8021q_vid(rif->fid);
-
-out:
- mutex_unlock(&mlxsw_sp->router->lock);
- return vid;
-}
-
static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
{
char ritr_pl[MLXSW_REG_RITR_LEN];
@@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
return lb_rif->common.rif_index;
}
-u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
-{
- struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
- u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
- struct mlxsw_sp_vr *ul_vr;
-
- ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
- if (WARN_ON(IS_ERR(ul_vr)))
- return 0;
-
- return ul_vr->id;
-}
-
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
{
return lb_rif->ul_rif_id;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 0432c7cc6b07..313efab5c324 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
-u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
index 59951b5abdb7..ac80981f67c0 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
@@ -10,6 +10,40 @@
static struct dentry *fbnic_dbg_root;
+static void fbnic_dbg_desc_break(struct seq_file *s, int i)
+{
+ while (i--)
+ seq_putc(s, '-');
+
+ seq_putc(s, '\n');
+}
+
+static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v)
+{
+ struct fbnic_dev *fbd = s->private;
+ char hdr[80];
+ int i;
+
+ /* Generate Header */
+ snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n",
+ "Idx", "S", "TCAM Bitmap", "Addr/Mask");
+ seq_puts(s, hdr);
+ fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr)));
+
+ for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) {
+ struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i];
+
+ seq_printf(s, "%02d %d %64pb %pm\n",
+ i, mac_addr->state, mac_addr->act_tcam,
+ mac_addr->value.addr8);
+ seq_printf(s, " %pm\n",
+ mac_addr->mask.addr8);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr);
+
static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v)
{
struct fbnic_dev *fbd = s->private;
@@ -48,6 +82,8 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd)
fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root);
debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd,
&fbnic_dbg_pcie_stats_fops);
+ debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd,
+ &fbnic_dbg_mac_addr_fops);
}
void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index 7a96b6ee773f..1db57c42333e 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -628,6 +628,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
fbnic_rss_key_fill(fbn->rss_key);
fbnic_rss_init_en_mask(fbn);
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
netdev->features |=
NETIF_F_RXHASH |
NETIF_F_SG |
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index fa167b1aa019..5222a035fd19 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn,
u16 length;
int rc;
- /* Valiate PF can send such a request */
+ /* Validate PF can send such a request */
if (!vf->vport_instance) {
DP_VERBOSE(p_hwfn,
QED_MSG_IOV,
@@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn,
goto out;
}
- /* Determine if the unicast filtering is acceptible by PF */
+ /* Determine if the unicast filtering is acceptable by PF */
if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
(params.type == QED_FILTER_VLAN ||
params.type == QED_FILTER_MAC_VLAN)) {
@@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn,
rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf);
if (rc) {
- DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n",
+ DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n",
vfid);
return rc;
}
@@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
/* Failure to acquire the ptt in 100g creates an odd error
- * where the first engine has already relased IOV.
+ * where the first engine has already released IOV.
*/
if (!ptt) {
DP_ERR(hwfn, "Failed to acquire ptt\n");
diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig
index 8a8ea51c639e..fe136f61586f 100644
--- a/drivers/net/ethernet/realtek/Kconfig
+++ b/drivers/net/ethernet/realtek/Kconfig
@@ -114,7 +114,8 @@ config R8169
will be called r8169. This is recommended.
config R8169_LEDS
- def_bool R8169 && LEDS_TRIGGER_NETDEV
+ bool "Support for controlling the NIC LEDs"
+ depends on R8169 && LEDS_TRIGGER_NETDEV
depends on !(R8169=y && LEDS_CLASS=m)
help
Optional support for controlling the NIC LED's with the netdev
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5a5eba49c651..7306c8e323d7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5222,6 +5222,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
new_bus->priv = tp;
new_bus->parent = &pdev->dev;
new_bus->irq[0] = PHY_MAC_INTERRUPT;
+ new_bus->phy_mask = GENMASK(31, 1);
snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x",
pci_domain_nr(pdev->bus), pci_dev_id(pdev));
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 6bf3ec985f3d..f181f05cb429 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -13,7 +13,7 @@
*/
const char *phy_speed_to_str(int speed)
{
- BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103,
+ BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121,
"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
"If a speed or mode has been added please update phy_speed_to_str "
"and the PHY settings array.\n");
@@ -169,6 +169,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ),
PHY_SETTING( 800000, FULL, 800000baseSR8_Full ),
PHY_SETTING( 800000, FULL, 800000baseVR8_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseCR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseKR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseDR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseSR4_Full ),
+ PHY_SETTING( 800000, FULL, 800000baseVR4_Full ),
/* 400G */
PHY_SETTING( 400000, FULL, 400000baseCR8_Full ),
PHY_SETTING( 400000, FULL, 400000baseKR8_Full ),
@@ -180,6 +186,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseDR4_Full ),
PHY_SETTING( 400000, FULL, 400000baseSR4_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseCR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseKR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseDR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseSR2_Full ),
+ PHY_SETTING( 400000, FULL, 400000baseVR2_Full ),
/* 200G */
PHY_SETTING( 200000, FULL, 200000baseCR4_Full ),
PHY_SETTING( 200000, FULL, 200000baseKR4_Full ),
@@ -191,6 +203,12 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseDR2_Full ),
PHY_SETTING( 200000, FULL, 200000baseSR2_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseCR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseKR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseSR_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseVR_Full ),
/* 100G */
PHY_SETTING( 100000, FULL, 100000baseCR4_Full ),
PHY_SETTING( 100000, FULL, 100000baseKR4_Full ),
diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig
index 31935f147d87..b05c2a1e9024 100644
--- a/drivers/net/phy/realtek/Kconfig
+++ b/drivers/net/phy/realtek/Kconfig
@@ -4,8 +4,12 @@ config REALTEK_PHY
help
Currently supports RTL821x/RTL822x and fast ethernet PHYs
+if REALTEK_PHY
+
config REALTEK_PHY_HWMON
- def_bool REALTEK_PHY && HWMON
- depends on !(REALTEK_PHY=y && HWMON=m)
+ bool "HWMON support for Realtek PHYs"
+ depends on HWMON && !(REALTEK_PHY=y && HWMON=m)
help
Optional hwmon support for the temperature sensor
+
+endif # REALTEK_PHY
diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c
index 572a933636b0..210fefac44d4 100644
--- a/drivers/net/phy/realtek/realtek_main.c
+++ b/drivers/net/phy/realtek/realtek_main.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/clk.h>
+#include <linux/string_choices.h>
#include "realtek.h"
@@ -422,11 +423,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n",
- val_txdly ? "Enabling" : "Disabling");
+ str_enable_disable(val_txdly));
} else {
dev_dbg(dev,
"2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n",
- val_txdly ? "enabled" : "disabled");
+ str_enabled_disabled(val_txdly));
}
ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY,
@@ -437,11 +438,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
} else if (ret) {
dev_dbg(dev,
"%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n",
- val_rxdly ? "Enabling" : "Disabling");
+ str_enable_disable(val_rxdly));
} else {
dev_dbg(dev,
"2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n",
- val_rxdly ? "enabled" : "disabled");
+ str_enabled_disabled(val_rxdly));
}
if (priv->has_phycr2) {
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 92516189e792..03175419bf28 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
be32_to_cpu(fdb->vni)))
goto nla_put_failure;
- ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+ ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used));
ci.ndm_confirmed = 0;
- ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+ ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated));
ci.ndm_refcnt = 0;
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
@@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
f = __vxlan_find_mac(vxlan, mac, vni);
- if (f && f->used != jiffies)
- f->used = jiffies;
+ if (f) {
+ unsigned long now = jiffies;
+
+ if (READ_ONCE(f->used) != now)
+ WRITE_ONCE(f->used, now);
+ }
return f;
}
@@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
!(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
if (f->state != state) {
f->state = state;
- f->updated = jiffies;
notify = 1;
}
if (f->flags != fdb_flags) {
f->flags = fdb_flags;
- f->updated = jiffies;
notify = 1;
}
}
@@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
}
if (ndm_flags & NTF_USE)
- f->used = jiffies;
+ WRITE_ONCE(f->updated, jiffies);
if (notify) {
if (rd == NULL)
rd = first_remote_rtnl(f);
+ WRITE_ONCE(f->updated, jiffies);
err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
swdev_notify, extack);
if (err)
@@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
struct vxlan_fdb *f;
int err = -ENOENT;
- f = vxlan_find_mac(vxlan, addr, src_vni);
+ f = __vxlan_find_mac(vxlan, addr, src_vni);
if (!f)
return err;
@@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
ifindex = src_ifindex;
#endif
- f = vxlan_find_mac(vxlan, src_mac, vni);
+ f = __vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
+ unsigned long now = jiffies;
+
+ if (READ_ONCE(f->updated) != now)
+ WRITE_ONCE(f->updated, now);
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
rdst->remote_ifindex == ifindex))
@@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
src_mac, &rdst->remote_ip.sa, &src_ip->sa);
rdst->remote_ip = *src_ip;
- f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
@@ -2852,7 +2858,7 @@ static void vxlan_cleanup(struct timer_list *t)
if (f->flags & NTF_EXT_LEARNED)
continue;
- timeout = f->used + vxlan->cfg.age_interval * HZ;
+ timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ;
if (time_before_eq(timeout, jiffies)) {
netdev_dbg(vxlan->dev,
"garbage collect %pM\n",
@@ -4768,7 +4774,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev,
spin_lock_bh(&vxlan->hash_lock[hash_index]);
- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
goto out;
@@ -4824,7 +4830,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev,
hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
spin_lock_bh(&vxlan->hash_lock[hash_index]);
- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
err = -ENOENT;
else if (f->flags & NTF_EXT_LEARNED)
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index c61e6427384c..9eb9e3c49f81 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -2,15 +2,6 @@
menu "S/390 network device drivers"
depends on NETDEVICES && S390
-config LCS
- def_tristate m
- prompt "Lan Channel Station Interface"
- depends on CCW && NETDEVICES && ETHERNET
- help
- Select this option if you want to use LCS networking on IBM System z.
- To compile as a module, choose M. The module name is lcs.
- If you do not use LCS, choose N.
-
config CTCM
def_tristate m
prompt "CTC and MPC SNA device support"
@@ -98,7 +89,7 @@ config QETH_OSX
config CCWGROUP
tristate
- default (LCS || CTCM || QETH || SMC)
+ default (CTCM || QETH || SMC)
config ISM
tristate "Support for ISM vPCI Adapter"
diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile
index bc55ec316adb..b5aaba290127 100644
--- a/drivers/s390/net/Makefile
+++ b/drivers/s390/net/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o
obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o
obj-$(CONFIG_SMSGIUCV) += smsgiucv.o
obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o
-obj-$(CONFIG_LCS) += lcs.o
qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o
obj-$(CONFIG_QETH) += qeth.o
qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
deleted file mode 100644
index 88db8378325a..000000000000
--- a/drivers/s390/net/lcs.c
+++ /dev/null
@@ -1,2385 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Linux for S/390 LAN channel station device driver
- *
- * Copyright IBM Corp. 1999, 2009
- * Author(s): Original Code written by
- * DJ Barrow <djbarrow@de.ibm.com,barrow_dj@yahoo.com>
- * Rewritten by
- * Frank Pavlic <fpavlic@de.ibm.com> and
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#define KMSG_COMPONENT "lcs"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/in.h>
-#include <linux/igmp.h>
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <net/arp.h>
-#include <net/ip.h>
-
-#include <asm/debug.h>
-#include <asm/idals.h>
-#include <asm/timex.h>
-#include <linux/device.h>
-#include <asm/ccwgroup.h>
-
-#include "lcs.h"
-
-
-/*
- * initialization string for output
- */
-
-static char version[] __initdata = "LCS driver";
-
-/*
- * the root device for lcs group devices
- */
-static struct device *lcs_root_dev;
-
-/*
- * Some prototypes.
- */
-static void lcs_tasklet(unsigned long);
-static void lcs_start_kernel_thread(struct work_struct *);
-static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *);
-#ifdef CONFIG_IP_MULTICAST
-static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *);
-#endif /* CONFIG_IP_MULTICAST */
-static int lcs_recovery(void *ptr);
-
-/*
- * Debug Facility Stuff
- */
-static char debug_buffer[255];
-static debug_info_t *lcs_dbf_setup;
-static debug_info_t *lcs_dbf_trace;
-
-/*
- * LCS Debug Facility functions
- */
-static void
-lcs_unregister_debug_facility(void)
-{
- debug_unregister(lcs_dbf_setup);
- debug_unregister(lcs_dbf_trace);
-}
-
-static int
-lcs_register_debug_facility(void)
-{
- lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8);
- lcs_dbf_trace = debug_register("lcs_trace", 4, 1, 8);
- if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) {
- pr_err("Not enough memory for debug facility.\n");
- lcs_unregister_debug_facility();
- return -ENOMEM;
- }
- debug_register_view(lcs_dbf_setup, &debug_hex_ascii_view);
- debug_set_level(lcs_dbf_setup, 2);
- debug_register_view(lcs_dbf_trace, &debug_hex_ascii_view);
- debug_set_level(lcs_dbf_trace, 2);
- return 0;
-}
-
-/*
- * Allocate io buffers.
- */
-static int
-lcs_alloc_channel(struct lcs_channel *channel)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ichalloc");
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- /* alloc memory fo iobuffer */
- channel->iob[cnt].data =
- kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL);
- if (channel->iob[cnt].data == NULL)
- break;
- channel->iob[cnt].state = LCS_BUF_STATE_EMPTY;
- }
- if (cnt < LCS_NUM_BUFFS) {
- /* Not all io buffers could be allocated. */
- LCS_DBF_TEXT(2, setup, "echalloc");
- while (cnt-- > 0)
- kfree(channel->iob[cnt].data);
- return -ENOMEM;
- }
- return 0;
-}
-
-/*
- * Free io buffers.
- */
-static void
-lcs_free_channel(struct lcs_channel *channel)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ichfree");
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- kfree(channel->iob[cnt].data);
- channel->iob[cnt].data = NULL;
- }
-}
-
-/*
- * Cleanup channel.
- */
-static void
-lcs_cleanup_channel(struct lcs_channel *channel)
-{
- LCS_DBF_TEXT(3, setup, "cleanch");
- /* Kill write channel tasklets. */
- tasklet_kill(&channel->irq_tasklet);
- /* Free channel buffers. */
- lcs_free_channel(channel);
-}
-
-/*
- * LCS free memory for card and channels.
- */
-static void
-lcs_free_card(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, setup, "remcard");
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
- kfree(card);
-}
-
-/*
- * LCS alloc memory for card and channels
- */
-static struct lcs_card *
-lcs_alloc_card(void)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, setup, "alloclcs");
-
- card = kzalloc(sizeof(struct lcs_card), GFP_KERNEL | GFP_DMA);
- if (card == NULL)
- return NULL;
- card->lan_type = LCS_FRAME_TYPE_AUTO;
- card->pkt_seq = 0;
- card->lancmd_timeout = LCS_LANCMD_TIMEOUT_DEFAULT;
- /* Allocate io buffers for the read channel. */
- rc = lcs_alloc_channel(&card->read);
- if (rc){
- LCS_DBF_TEXT(2, setup, "iccwerr");
- lcs_free_card(card);
- return NULL;
- }
- /* Allocate io buffers for the write channel. */
- rc = lcs_alloc_channel(&card->write);
- if (rc) {
- LCS_DBF_TEXT(2, setup, "iccwerr");
- lcs_cleanup_channel(&card->read);
- lcs_free_card(card);
- return NULL;
- }
-
-#ifdef CONFIG_IP_MULTICAST
- INIT_LIST_HEAD(&card->ipm_list);
-#endif
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
- return card;
-}
-
-/*
- * Setup read channel.
- */
-static void
-lcs_setup_read_ccws(struct lcs_card *card)
-{
- int cnt;
-
- LCS_DBF_TEXT(2, setup, "ireadccw");
- /* Setup read ccws. */
- memset(card->read.ccws, 0, sizeof (struct ccw1) * (LCS_NUM_BUFFS + 1));
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- card->read.ccws[cnt].cmd_code = LCS_CCW_READ;
- card->read.ccws[cnt].count = LCS_IOBUFFERSIZE;
- card->read.ccws[cnt].flags =
- CCW_FLAG_CC | CCW_FLAG_SLI | CCW_FLAG_PCI;
- /*
- * Note: we have allocated the buffer with GFP_DMA, so
- * we do not need to do set_normalized_cda.
- */
- card->read.ccws[cnt].cda =
- virt_to_dma32(card->read.iob[cnt].data);
- ((struct lcs_header *)
- card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET;
- card->read.iob[cnt].callback = lcs_get_frames_cb;
- card->read.iob[cnt].state = LCS_BUF_STATE_READY;
- card->read.iob[cnt].count = LCS_IOBUFFERSIZE;
- }
- card->read.ccws[0].flags &= ~CCW_FLAG_PCI;
- card->read.ccws[LCS_NUM_BUFFS - 1].flags &= ~CCW_FLAG_PCI;
- card->read.ccws[LCS_NUM_BUFFS - 1].flags |= CCW_FLAG_SUSPEND;
- /* Last ccw is a tic (transfer in channel). */
- card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER;
- card->read.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->read.ccws);
- /* Setg initial state of the read channel. */
- card->read.state = LCS_CH_STATE_INIT;
-
- card->read.io_idx = 0;
- card->read.buf_idx = 0;
-}
-
-static void
-lcs_setup_read(struct lcs_card *card)
-{
- LCS_DBF_TEXT(3, setup, "initread");
-
- lcs_setup_read_ccws(card);
- /* Initialize read channel tasklet. */
- card->read.irq_tasklet.data = (unsigned long) &card->read;
- card->read.irq_tasklet.func = lcs_tasklet;
- /* Initialize waitqueue. */
- init_waitqueue_head(&card->read.wait_q);
-}
-
-/*
- * Setup write channel.
- */
-static void
-lcs_setup_write_ccws(struct lcs_card *card)
-{
- int cnt;
-
- LCS_DBF_TEXT(3, setup, "iwritccw");
- /* Setup write ccws. */
- memset(card->write.ccws, 0, sizeof(struct ccw1) * (LCS_NUM_BUFFS + 1));
- for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) {
- card->write.ccws[cnt].cmd_code = LCS_CCW_WRITE;
- card->write.ccws[cnt].count = 0;
- card->write.ccws[cnt].flags =
- CCW_FLAG_SUSPEND | CCW_FLAG_CC | CCW_FLAG_SLI;
- /*
- * Note: we have allocated the buffer with GFP_DMA, so
- * we do not need to do set_normalized_cda.
- */
- card->write.ccws[cnt].cda =
- virt_to_dma32(card->write.iob[cnt].data);
- }
- /* Last ccw is a tic (transfer in channel). */
- card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER;
- card->write.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->write.ccws);
- /* Set initial state of the write channel. */
- card->read.state = LCS_CH_STATE_INIT;
-
- card->write.io_idx = 0;
- card->write.buf_idx = 0;
-}
-
-static void
-lcs_setup_write(struct lcs_card *card)
-{
- LCS_DBF_TEXT(3, setup, "initwrit");
-
- lcs_setup_write_ccws(card);
- /* Initialize write channel tasklet. */
- card->write.irq_tasklet.data = (unsigned long) &card->write;
- card->write.irq_tasklet.func = lcs_tasklet;
- /* Initialize waitqueue. */
- init_waitqueue_head(&card->write.wait_q);
-}
-
-static void
-lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- card->thread_allowed_mask = threads;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- wake_up(&card->wait_q);
-}
-static int lcs_threads_running(struct lcs_card *card, unsigned long threads)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- rc = (card->thread_running_mask & threads);
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-static int
-lcs_wait_for_threads(struct lcs_card *card, unsigned long threads)
-{
- return wait_event_interruptible(card->wait_q,
- lcs_threads_running(card, threads) == 0);
-}
-
-static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- if ( !(card->thread_allowed_mask & thread) ||
- (card->thread_start_mask & thread) ) {
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return -EPERM;
- }
- card->thread_start_mask |= thread;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return 0;
-}
-
-static void
-lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- card->thread_running_mask &= ~thread;
- spin_unlock_irqrestore(&card->mask_lock, flags);
- wake_up(&card->wait_q);
-}
-
-static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- if (card->thread_start_mask & thread){
- if ((card->thread_allowed_mask & thread) &&
- !(card->thread_running_mask & thread)){
- rc = 1;
- card->thread_start_mask &= ~thread;
- card->thread_running_mask |= thread;
- } else
- rc = -EPERM;
- }
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-static int
-lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
-{
- int rc = 0;
- wait_event(card->wait_q,
- (rc = __lcs_do_run_thread(card, thread)) >= 0);
- return rc;
-}
-
-static int
-lcs_do_start_thread(struct lcs_card *card, unsigned long thread)
-{
- unsigned long flags;
- int rc = 0;
-
- spin_lock_irqsave(&card->mask_lock, flags);
- LCS_DBF_TEXT_(4, trace, " %02x%02x%02x",
- (u8) card->thread_start_mask,
- (u8) card->thread_allowed_mask,
- (u8) card->thread_running_mask);
- rc = (card->thread_start_mask & thread);
- spin_unlock_irqrestore(&card->mask_lock, flags);
- return rc;
-}
-
-/*
- * Initialize channels,card and state machines.
- */
-static void
-lcs_setup_card(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, setup, "initcard");
- LCS_DBF_HEX(2, setup, &card, sizeof(void*));
-
- lcs_setup_read(card);
- lcs_setup_write(card);
- /* Set cards initial state. */
- card->state = DEV_STATE_DOWN;
- card->tx_buffer = NULL;
- card->tx_emitted = 0;
-
- init_waitqueue_head(&card->wait_q);
- spin_lock_init(&card->lock);
- spin_lock_init(&card->ipm_lock);
- spin_lock_init(&card->mask_lock);
-#ifdef CONFIG_IP_MULTICAST
- INIT_LIST_HEAD(&card->ipm_list);
-#endif
- INIT_LIST_HEAD(&card->lancmd_waiters);
-}
-
-static void lcs_clear_multicast_list(struct lcs_card *card)
-{
-#ifdef CONFIG_IP_MULTICAST
- struct lcs_ipm_list *ipm;
- unsigned long flags;
-
- /* Free multicast list. */
- LCS_DBF_TEXT(3, setup, "clmclist");
- spin_lock_irqsave(&card->ipm_lock, flags);
- while (!list_empty(&card->ipm_list)){
- ipm = list_entry(card->ipm_list.next,
- struct lcs_ipm_list, list);
- list_del(&ipm->list);
- if (ipm->ipm_state != LCS_IPM_STATE_SET_REQUIRED){
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- lcs_send_delipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- }
- kfree(ipm);
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-#endif
-}
-
-/*
- * Cleanup channels,card and state machines.
- */
-static void
-lcs_cleanup_card(struct lcs_card *card)
-{
-
- LCS_DBF_TEXT(3, setup, "cleancrd");
- LCS_DBF_HEX(2,setup,&card,sizeof(void*));
-
- if (card->dev != NULL)
- free_netdev(card->dev);
- /* Cleanup channels. */
- lcs_cleanup_channel(&card->write);
- lcs_cleanup_channel(&card->read);
-}
-
-/*
- * Start channel.
- */
-static int
-lcs_start_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT_(4, trace,"ssch%s", dev_name(&channel->ccwdev->dev));
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_start(channel->ccwdev,
- channel->ccws + channel->io_idx, 0, 0,
- DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND);
- if (rc == 0)
- channel->state = LCS_CH_STATE_RUNNING;
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4,trace,"essh%s",
- dev_name(&channel->ccwdev->dev));
- dev_err(&channel->ccwdev->dev,
- "Starting an LCS device resulted in an error,"
- " rc=%d!\n", rc);
- }
- return rc;
-}
-
-static int
-lcs_clear_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4,trace,"clearch");
- LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev));
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_clear(channel->ccwdev, 0);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ecsc%s",
- dev_name(&channel->ccwdev->dev));
- return rc;
- }
- wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED));
- channel->state = LCS_CH_STATE_STOPPED;
- return rc;
-}
-
-
-/*
- * Stop channel.
- */
-static int
-lcs_stop_channel(struct lcs_channel *channel)
-{
- unsigned long flags;
- int rc;
-
- if (channel->state == LCS_CH_STATE_STOPPED)
- return 0;
- LCS_DBF_TEXT(4,trace,"haltsch");
- LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev));
- channel->state = LCS_CH_STATE_INIT;
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- rc = ccw_device_halt(channel->ccwdev, 0);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ehsc%s",
- dev_name(&channel->ccwdev->dev));
- return rc;
- }
- /* Asynchronous halt initialted. Wait for its completion. */
- wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED));
- lcs_clear_channel(channel);
- return 0;
-}
-
-/*
- * start read and write channel
- */
-static int
-lcs_start_channels(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(2, trace, "chstart");
- /* start read channel */
- rc = lcs_start_channel(&card->read);
- if (rc)
- return rc;
- /* start write channel */
- rc = lcs_start_channel(&card->write);
- if (rc)
- lcs_stop_channel(&card->read);
- return rc;
-}
-
-/*
- * stop read and write channel
- */
-static int
-lcs_stop_channels(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, trace, "chhalt");
- lcs_stop_channel(&card->read);
- lcs_stop_channel(&card->write);
- return 0;
-}
-
-/*
- * Get empty buffer.
- */
-static struct lcs_buffer *
-__lcs_get_buffer(struct lcs_channel *channel)
-{
- int index;
-
- LCS_DBF_TEXT(5, trace, "_getbuff");
- index = channel->io_idx;
- do {
- if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) {
- channel->iob[index].state = LCS_BUF_STATE_LOCKED;
- return channel->iob + index;
- }
- index = (index + 1) & (LCS_NUM_BUFFS - 1);
- } while (index != channel->io_idx);
- return NULL;
-}
-
-static struct lcs_buffer *
-lcs_get_buffer(struct lcs_channel *channel)
-{
- struct lcs_buffer *buffer;
- unsigned long flags;
-
- LCS_DBF_TEXT(5, trace, "getbuff");
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer = __lcs_get_buffer(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- return buffer;
-}
-
-/*
- * Resume channel program if the channel is suspended.
- */
-static int
-__lcs_resume_channel(struct lcs_channel *channel)
-{
- int rc;
-
- if (channel->state != LCS_CH_STATE_SUSPENDED)
- return 0;
- if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND)
- return 0;
- LCS_DBF_TEXT_(5, trace, "rsch%s", dev_name(&channel->ccwdev->dev));
- rc = ccw_device_resume(channel->ccwdev);
- if (rc) {
- LCS_DBF_TEXT_(4, trace, "ersc%s",
- dev_name(&channel->ccwdev->dev));
- dev_err(&channel->ccwdev->dev,
- "Sending data from the LCS device to the LAN failed"
- " with rc=%d\n",rc);
- } else
- channel->state = LCS_CH_STATE_RUNNING;
- return rc;
-
-}
-
-/*
- * Make a buffer ready for processing.
- */
-static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index)
-{
- int prev, next;
-
- LCS_DBF_TEXT(5, trace, "rdybits");
- prev = (index - 1) & (LCS_NUM_BUFFS - 1);
- next = (index + 1) & (LCS_NUM_BUFFS - 1);
- /* Check if we may clear the suspend bit of this buffer. */
- if (channel->ccws[next].flags & CCW_FLAG_SUSPEND) {
- /* Check if we have to set the PCI bit. */
- if (!(channel->ccws[prev].flags & CCW_FLAG_SUSPEND))
- /* Suspend bit of the previous buffer is not set. */
- channel->ccws[index].flags |= CCW_FLAG_PCI;
- /* Suspend bit of the next buffer is set. */
- channel->ccws[index].flags &= ~CCW_FLAG_SUSPEND;
- }
-}
-
-static int
-lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- unsigned long flags;
- int index, rc;
-
- LCS_DBF_TEXT(5, trace, "rdybuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED &&
- buffer->state != LCS_BUF_STATE_PROCESSED);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer->state = LCS_BUF_STATE_READY;
- index = buffer - channel->iob;
- /* Set length. */
- channel->ccws[index].count = buffer->count;
- /* Check relevant PCI/suspend bits. */
- __lcs_ready_buffer_bits(channel, index);
- rc = __lcs_resume_channel(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
- return rc;
-}
-
-/*
- * Mark the buffer as processed. Take care of the suspend bit
- * of the previous buffer. This function is called from
- * interrupt context, so the lock must not be taken.
- */
-static int
-__lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- int index, prev, next;
-
- LCS_DBF_TEXT(5, trace, "prcsbuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_READY);
- buffer->state = LCS_BUF_STATE_PROCESSED;
- index = buffer - channel->iob;
- prev = (index - 1) & (LCS_NUM_BUFFS - 1);
- next = (index + 1) & (LCS_NUM_BUFFS - 1);
- /* Set the suspend bit and clear the PCI bit of this buffer. */
- channel->ccws[index].flags |= CCW_FLAG_SUSPEND;
- channel->ccws[index].flags &= ~CCW_FLAG_PCI;
- /* Check the suspend bit of the previous buffer. */
- if (channel->iob[prev].state == LCS_BUF_STATE_READY) {
- /*
- * Previous buffer is in state ready. It might have
- * happened in lcs_ready_buffer that the suspend bit
- * has not been cleared to avoid an endless loop.
- * Do it now.
- */
- __lcs_ready_buffer_bits(channel, prev);
- }
- /* Clear PCI bit of next buffer. */
- channel->ccws[next].flags &= ~CCW_FLAG_PCI;
- return __lcs_resume_channel(channel);
-}
-
-/*
- * Put a processed buffer back to state empty.
- */
-static void
-lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- unsigned long flags;
-
- LCS_DBF_TEXT(5, trace, "relbuff");
- BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED &&
- buffer->state != LCS_BUF_STATE_PROCESSED);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- buffer->state = LCS_BUF_STATE_EMPTY;
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
-}
-
-/*
- * Get buffer for a lan command.
- */
-static struct lcs_buffer *
-lcs_get_lancmd(struct lcs_card *card, int count)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(4, trace, "getlncmd");
- /* Get buffer and wait if none is available. */
- wait_event(card->write.wait_q,
- ((buffer = lcs_get_buffer(&card->write)) != NULL));
- count += sizeof(struct lcs_header);
- *(__u16 *)(buffer->data + count) = 0;
- buffer->count = count + sizeof(__u16);
- buffer->callback = lcs_release_buffer;
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->offset = count;
- cmd->type = LCS_FRAME_TYPE_CONTROL;
- cmd->slot = 0;
- return buffer;
-}
-
-
-static void
-lcs_get_reply(struct lcs_reply *reply)
-{
- refcount_inc(&reply->refcnt);
-}
-
-static void
-lcs_put_reply(struct lcs_reply *reply)
-{
- if (refcount_dec_and_test(&reply->refcnt))
- kfree(reply);
-}
-
-static struct lcs_reply *
-lcs_alloc_reply(struct lcs_cmd *cmd)
-{
- struct lcs_reply *reply;
-
- LCS_DBF_TEXT(4, trace, "getreply");
-
- reply = kzalloc(sizeof(struct lcs_reply), GFP_ATOMIC);
- if (!reply)
- return NULL;
- refcount_set(&reply->refcnt, 1);
- reply->sequence_no = cmd->sequence_no;
- reply->received = 0;
- reply->rc = 0;
- init_waitqueue_head(&reply->wait_q);
-
- return reply;
-}
-
-/*
- * Notifier function for lancmd replies. Called from read irq.
- */
-static void
-lcs_notify_lancmd_waiters(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- struct list_head *l, *n;
- struct lcs_reply *reply;
-
- LCS_DBF_TEXT(4, trace, "notiwait");
- spin_lock(&card->lock);
- list_for_each_safe(l, n, &card->lancmd_waiters) {
- reply = list_entry(l, struct lcs_reply, list);
- if (reply->sequence_no == cmd->sequence_no) {
- lcs_get_reply(reply);
- list_del_init(&reply->list);
- if (reply->callback != NULL)
- reply->callback(card, cmd);
- reply->received = 1;
- reply->rc = cmd->return_code;
- wake_up(&reply->wait_q);
- lcs_put_reply(reply);
- break;
- }
- }
- spin_unlock(&card->lock);
-}
-
-/*
- * Emit buffer of a lan command.
- */
-static void
-lcs_lancmd_timeout(struct timer_list *t)
-{
- struct lcs_reply *reply = from_timer(reply, t, timer);
- struct lcs_reply *list_reply, *r;
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "timeout");
- spin_lock_irqsave(&reply->card->lock, flags);
- list_for_each_entry_safe(list_reply, r,
- &reply->card->lancmd_waiters,list) {
- if (reply == list_reply) {
- lcs_get_reply(reply);
- list_del_init(&reply->list);
- spin_unlock_irqrestore(&reply->card->lock, flags);
- reply->received = 1;
- reply->rc = -ETIME;
- wake_up(&reply->wait_q);
- lcs_put_reply(reply);
- return;
- }
- }
- spin_unlock_irqrestore(&reply->card->lock, flags);
-}
-
-static int
-lcs_send_lancmd(struct lcs_card *card, struct lcs_buffer *buffer,
- void (*reply_callback)(struct lcs_card *, struct lcs_cmd *))
-{
- struct lcs_reply *reply;
- struct lcs_cmd *cmd;
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4, trace, "sendcmd");
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->return_code = 0;
- cmd->sequence_no = card->sequence_no++;
- reply = lcs_alloc_reply(cmd);
- if (!reply)
- return -ENOMEM;
- reply->callback = reply_callback;
- reply->card = card;
- spin_lock_irqsave(&card->lock, flags);
- list_add_tail(&reply->list, &card->lancmd_waiters);
- spin_unlock_irqrestore(&card->lock, flags);
-
- buffer->callback = lcs_release_buffer;
- rc = lcs_ready_buffer(&card->write, buffer);
- if (rc)
- return rc;
- timer_setup(&reply->timer, lcs_lancmd_timeout, 0);
- mod_timer(&reply->timer, jiffies + HZ * card->lancmd_timeout);
- wait_event(reply->wait_q, reply->received);
- del_timer_sync(&reply->timer);
- LCS_DBF_TEXT_(4, trace, "rc:%d",reply->rc);
- rc = reply->rc;
- lcs_put_reply(reply);
- return rc ? -EIO : 0;
-}
-
-/*
- * LCS startup command
- */
-static int
-lcs_send_startup(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "startup");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STARTUP;
- cmd->initiator = initiator;
- cmd->cmd.lcs_startup.buff_size = LCS_IOBUFFERSIZE;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * LCS shutdown command
- */
-static int
-lcs_send_shutdown(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "shutdown");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_SHUTDOWN;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * LCS lanstat command
- */
-static void
-__lcs_lanstat_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "statcb");
- memcpy(card->mac, cmd->cmd.lcs_lanstat_cmd.mac_addr, LCS_MAC_LENGTH);
-}
-
-static int
-lcs_send_lanstat(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2,trace, "cmdstat");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- /* Setup lanstat command. */
- cmd->cmd_code = LCS_CMD_LANSTAT;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, __lcs_lanstat_cb);
-}
-
-/*
- * send stoplan command
- */
-static int
-lcs_send_stoplan(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdstpln");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STOPLAN;
- cmd->initiator = initiator;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * send startlan command
- */
-static void
-__lcs_send_startlan_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "srtlancb");
- card->lan_type = cmd->cmd.lcs_std_cmd.lan_type;
- card->portno = cmd->cmd.lcs_std_cmd.portno;
-}
-
-static int
-lcs_send_startlan(struct lcs_card *card, __u8 initiator)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdstaln");
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_STARTLAN;
- cmd->initiator = initiator;
- cmd->cmd.lcs_std_cmd.lan_type = card->lan_type;
- cmd->cmd.lcs_std_cmd.portno = card->portno;
- return lcs_send_lancmd(card, buffer, __lcs_send_startlan_cb);
-}
-
-#ifdef CONFIG_IP_MULTICAST
-/*
- * send setipm command (Multicast)
- */
-static int
-lcs_send_setipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmdsetim");
- buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_SETIPM;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair,
- &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair));
- LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr);
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * send delipm command (Multicast)
- */
-static int
-lcs_send_delipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
-
- LCS_DBF_TEXT(2, trace, "cmddelim");
- buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_DELIPM;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair,
- &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair));
- LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr);
- return lcs_send_lancmd(card, buffer, NULL);
-}
-
-/*
- * check if multicast is supported by LCS
- */
-static void
-__lcs_check_multicast_cb(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(2, trace, "chkmccb");
- card->ip_assists_supported =
- cmd->cmd.lcs_qipassist.ip_assists_supported;
- card->ip_assists_enabled =
- cmd->cmd.lcs_qipassist.ip_assists_enabled;
-}
-
-static int
-lcs_check_multicast_support(struct lcs_card *card)
-{
- struct lcs_buffer *buffer;
- struct lcs_cmd *cmd;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "cmdqipa");
- /* Send query ipassist. */
- buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE);
- cmd = (struct lcs_cmd *) buffer->data;
- cmd->cmd_code = LCS_CMD_QIPASSIST;
- cmd->initiator = LCS_INITIATOR_TCPIP;
- cmd->cmd.lcs_qipassist.lan_type = card->lan_type;
- cmd->cmd.lcs_qipassist.portno = card->portno;
- cmd->cmd.lcs_qipassist.version = 4;
- cmd->cmd.lcs_qipassist.num_ip_pairs = 1;
- rc = lcs_send_lancmd(card, buffer, __lcs_check_multicast_cb);
- if (rc != 0) {
- pr_err("Query IPAssist failed. Assuming unsupported!\n");
- return -EOPNOTSUPP;
- }
- if (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT)
- return 0;
- return -EOPNOTSUPP;
-}
-
-/*
- * set or del multicast address on LCS card
- */
-static void
-lcs_fix_multicast_list(struct lcs_card *card)
-{
- struct list_head failed_list;
- struct lcs_ipm_list *ipm, *tmp;
- unsigned long flags;
- int rc;
-
- LCS_DBF_TEXT(4,trace, "fixipm");
- INIT_LIST_HEAD(&failed_list);
- spin_lock_irqsave(&card->ipm_lock, flags);
-list_modified:
- list_for_each_entry_safe(ipm, tmp, &card->ipm_list, list){
- switch (ipm->ipm_state) {
- case LCS_IPM_STATE_SET_REQUIRED:
- /* del from ipm_list so no one else can tamper with
- * this entry */
- list_del_init(&ipm->list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- rc = lcs_send_setipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- if (rc) {
- pr_info("Adding multicast address failed."
- " Table possibly full!\n");
- /* store ipm in failed list -> will be added
- * to ipm_list again, so a retry will be done
- * during the next call of this function */
- list_add_tail(&ipm->list, &failed_list);
- } else {
- ipm->ipm_state = LCS_IPM_STATE_ON_CARD;
- /* re-insert into ipm_list */
- list_add_tail(&ipm->list, &card->ipm_list);
- }
- goto list_modified;
- case LCS_IPM_STATE_DEL_REQUIRED:
- list_del(&ipm->list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- lcs_send_delipm(card, ipm);
- spin_lock_irqsave(&card->ipm_lock, flags);
- kfree(ipm);
- goto list_modified;
- case LCS_IPM_STATE_ON_CARD:
- break;
- }
- }
- /* re-insert all entries from the failed_list into ipm_list */
- list_for_each_entry_safe(ipm, tmp, &failed_list, list)
- list_move_tail(&ipm->list, &card->ipm_list);
-
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-}
-
-/*
- * get mac address for the relevant Multicast address
- */
-static void
-lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev)
-{
- LCS_DBF_TEXT(4,trace, "getmac");
- ip_eth_mc_map(ipm, mac);
-}
-
-/*
- * function called by net device to handle multicast address relevant things
- */
-static void lcs_remove_mc_addresses(struct lcs_card *card,
- struct in_device *in4_dev)
-{
- struct ip_mc_list *im4;
- struct list_head *l;
- struct lcs_ipm_list *ipm;
- unsigned long flags;
- char buf[MAX_ADDR_LEN];
-
- LCS_DBF_TEXT(4, trace, "remmclst");
- spin_lock_irqsave(&card->ipm_lock, flags);
- list_for_each(l, &card->ipm_list) {
- ipm = list_entry(l, struct lcs_ipm_list, list);
- for (im4 = rcu_dereference(in4_dev->mc_list);
- im4 != NULL; im4 = rcu_dereference(im4->next_rcu)) {
- lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev);
- if ( (ipm->ipm.ip_addr == im4->multiaddr) &&
- (memcmp(buf, &ipm->ipm.mac_addr,
- LCS_MAC_LENGTH) == 0) )
- break;
- }
- if (im4 == NULL)
- ipm->ipm_state = LCS_IPM_STATE_DEL_REQUIRED;
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
-}
-
-static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card,
- struct ip_mc_list *im4,
- char *buf)
-{
- struct lcs_ipm_list *tmp, *ipm = NULL;
- struct list_head *l;
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "chkmcent");
- spin_lock_irqsave(&card->ipm_lock, flags);
- list_for_each(l, &card->ipm_list) {
- tmp = list_entry(l, struct lcs_ipm_list, list);
- if ( (tmp->ipm.ip_addr == im4->multiaddr) &&
- (memcmp(buf, &tmp->ipm.mac_addr,
- LCS_MAC_LENGTH) == 0) ) {
- ipm = tmp;
- break;
- }
- }
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- return ipm;
-}
-
-static void lcs_set_mc_addresses(struct lcs_card *card,
- struct in_device *in4_dev)
-{
-
- struct ip_mc_list *im4;
- struct lcs_ipm_list *ipm;
- char buf[MAX_ADDR_LEN];
- unsigned long flags;
-
- LCS_DBF_TEXT(4, trace, "setmclst");
- for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL;
- im4 = rcu_dereference(im4->next_rcu)) {
- lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev);
- ipm = lcs_check_addr_entry(card, im4, buf);
- if (ipm != NULL)
- continue; /* Address already in list. */
- ipm = kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC);
- if (ipm == NULL) {
- pr_info("Not enough memory to add"
- " new multicast entry!\n");
- break;
- }
- memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH);
- ipm->ipm.ip_addr = im4->multiaddr;
- ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED;
- spin_lock_irqsave(&card->ipm_lock, flags);
- LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4);
- list_add(&ipm->list, &card->ipm_list);
- spin_unlock_irqrestore(&card->ipm_lock, flags);
- }
-}
-
-static int
-lcs_register_mc_addresses(void *data)
-{
- struct lcs_card *card;
- struct in_device *in4_dev;
-
- card = (struct lcs_card *) data;
-
- if (!lcs_do_run_thread(card, LCS_SET_MC_THREAD))
- return 0;
- LCS_DBF_TEXT(4, trace, "regmulti");
-
- in4_dev = in_dev_get(card->dev);
- if (in4_dev == NULL)
- goto out;
- rcu_read_lock();
- lcs_remove_mc_addresses(card,in4_dev);
- lcs_set_mc_addresses(card, in4_dev);
- rcu_read_unlock();
- in_dev_put(in4_dev);
-
- netif_carrier_off(card->dev);
- netif_tx_disable(card->dev);
- wait_event(card->write.wait_q,
- (card->write.state != LCS_CH_STATE_RUNNING));
- lcs_fix_multicast_list(card);
- if (card->state == DEV_STATE_UP) {
- netif_carrier_on(card->dev);
- netif_wake_queue(card->dev);
- }
-out:
- lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD);
- return 0;
-}
-#endif /* CONFIG_IP_MULTICAST */
-
-/*
- * function called by net device to
- * handle multicast address relevant things
- */
-static void
-lcs_set_multicast_list(struct net_device *dev)
-{
-#ifdef CONFIG_IP_MULTICAST
- struct lcs_card *card;
-
- LCS_DBF_TEXT(4, trace, "setmulti");
- card = (struct lcs_card *) dev->ml_priv;
-
- if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD))
- schedule_work(&card->kernel_thread_starter);
-#endif /* CONFIG_IP_MULTICAST */
-}
-
-static long
-lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb)
-{
- if (!IS_ERR(irb))
- return 0;
-
- switch (PTR_ERR(irb)) {
- case -EIO:
- dev_warn(&cdev->dev,
- "An I/O-error occurred on the LCS device\n");
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT_(2, trace, " rc%d", -EIO);
- break;
- case -ETIMEDOUT:
- dev_warn(&cdev->dev,
- "A command timed out on the LCS device\n");
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT_(2, trace, " rc%d", -ETIMEDOUT);
- break;
- default:
- dev_warn(&cdev->dev,
- "An error occurred on the LCS device, rc=%ld\n",
- PTR_ERR(irb));
- LCS_DBF_TEXT(2, trace, "ckirberr");
- LCS_DBF_TEXT(2, trace, " rc???");
- }
- return PTR_ERR(irb);
-}
-
-static int
-lcs_get_problem(struct ccw_device *cdev, struct irb *irb)
-{
- int dstat, cstat;
- char *sense;
-
- sense = (char *) irb->ecw;
- cstat = irb->scsw.cmd.cstat;
- dstat = irb->scsw.cmd.dstat;
-
- if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK |
- SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK |
- SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) {
- LCS_DBF_TEXT(2, trace, "CGENCHK");
- return 1;
- }
- if (dstat & DEV_STAT_UNIT_CHECK) {
- if (sense[LCS_SENSE_BYTE_1] &
- LCS_SENSE_RESETTING_EVENT) {
- LCS_DBF_TEXT(2, trace, "REVIND");
- return 1;
- }
- if (sense[LCS_SENSE_BYTE_0] &
- LCS_SENSE_CMD_REJECT) {
- LCS_DBF_TEXT(2, trace, "CMDREJ");
- return 0;
- }
- if ((!sense[LCS_SENSE_BYTE_0]) &&
- (!sense[LCS_SENSE_BYTE_1]) &&
- (!sense[LCS_SENSE_BYTE_2]) &&
- (!sense[LCS_SENSE_BYTE_3])) {
- LCS_DBF_TEXT(2, trace, "ZEROSEN");
- return 0;
- }
- LCS_DBF_TEXT(2, trace, "DGENCHK");
- return 1;
- }
- return 0;
-}
-
-static void
-lcs_schedule_recovery(struct lcs_card *card)
-{
- LCS_DBF_TEXT(2, trace, "startrec");
- if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD))
- schedule_work(&card->kernel_thread_starter);
-}
-
-/*
- * IRQ Handler for LCS channels
- */
-static void
-lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
-{
- struct lcs_card *card;
- struct lcs_channel *channel;
- int rc, index;
- int cstat, dstat;
-
- if (lcs_check_irb_error(cdev, irb))
- return;
-
- card = CARD_FROM_DEV(cdev);
- if (card->read.ccwdev == cdev)
- channel = &card->read;
- else
- channel = &card->write;
-
- cstat = irb->scsw.cmd.cstat;
- dstat = irb->scsw.cmd.dstat;
- LCS_DBF_TEXT_(5, trace, "Rint%s", dev_name(&cdev->dev));
- LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.cstat,
- irb->scsw.cmd.dstat);
- LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.fctl,
- irb->scsw.cmd.actl);
-
- /* Check for channel and device errors presented */
- rc = lcs_get_problem(cdev, irb);
- if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) {
- dev_warn(&cdev->dev,
- "The LCS device stopped because of an error,"
- " dstat=0x%X, cstat=0x%X \n",
- dstat, cstat);
- if (rc) {
- channel->state = LCS_CH_STATE_ERROR;
- }
- }
- if (channel->state == LCS_CH_STATE_ERROR) {
- lcs_schedule_recovery(card);
- wake_up(&card->wait_q);
- return;
- }
- /* How far in the ccw chain have we processed? */
- if ((channel->state != LCS_CH_STATE_INIT) &&
- (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) &&
- (irb->scsw.cmd.cpa != 0)) {
- index = (struct ccw1 *)dma32_to_virt(irb->scsw.cmd.cpa)
- - channel->ccws;
- if ((irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) ||
- (irb->scsw.cmd.cstat & SCHN_STAT_PCI))
- /* Bloody io subsystem tells us lies about cpa... */
- index = (index - 1) & (LCS_NUM_BUFFS - 1);
- while (channel->io_idx != index) {
- __lcs_processed_buffer(channel,
- channel->iob + channel->io_idx);
- channel->io_idx =
- (channel->io_idx + 1) & (LCS_NUM_BUFFS - 1);
- }
- }
-
- if ((irb->scsw.cmd.dstat & DEV_STAT_DEV_END) ||
- (irb->scsw.cmd.dstat & DEV_STAT_CHN_END) ||
- (irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK))
- /* Mark channel as stopped. */
- channel->state = LCS_CH_STATE_STOPPED;
- else if (irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED)
- /* CCW execution stopped on a suspend bit. */
- channel->state = LCS_CH_STATE_SUSPENDED;
- if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) {
- if (irb->scsw.cmd.cc != 0) {
- ccw_device_halt(channel->ccwdev, 0);
- return;
- }
- /* The channel has been stopped by halt_IO. */
- channel->state = LCS_CH_STATE_HALTED;
- }
- if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
- channel->state = LCS_CH_STATE_CLEARED;
- /* Do the rest in the tasklet. */
- tasklet_schedule(&channel->irq_tasklet);
-}
-
-/*
- * Tasklet for IRQ handler
- */
-static void
-lcs_tasklet(unsigned long data)
-{
- unsigned long flags;
- struct lcs_channel *channel;
- struct lcs_buffer *iob;
- int buf_idx;
-
- channel = (struct lcs_channel *) data;
- LCS_DBF_TEXT_(5, trace, "tlet%s", dev_name(&channel->ccwdev->dev));
-
- /* Check for processed buffers. */
- iob = channel->iob;
- buf_idx = channel->buf_idx;
- while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) {
- /* Do the callback thing. */
- if (iob[buf_idx].callback != NULL)
- iob[buf_idx].callback(channel, iob + buf_idx);
- buf_idx = (buf_idx + 1) & (LCS_NUM_BUFFS - 1);
- }
- channel->buf_idx = buf_idx;
-
- if (channel->state == LCS_CH_STATE_STOPPED)
- lcs_start_channel(channel);
- spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
- if (channel->state == LCS_CH_STATE_SUSPENDED &&
- channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY)
- __lcs_resume_channel(channel);
- spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
-
- /* Something happened on the channel. Wake up waiters. */
- wake_up(&channel->wait_q);
-}
-
-/*
- * Finish current tx buffer and make it ready for transmit.
- */
-static void
-__lcs_emit_txbuffer(struct lcs_card *card)
-{
- LCS_DBF_TEXT(5, trace, "emittx");
- *(__u16 *)(card->tx_buffer->data + card->tx_buffer->count) = 0;
- card->tx_buffer->count += 2;
- lcs_ready_buffer(&card->write, card->tx_buffer);
- card->tx_buffer = NULL;
- card->tx_emitted++;
-}
-
-/*
- * Callback for finished tx buffers.
- */
-static void
-lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(5, trace, "txbuffcb");
- /* Put buffer back to pool. */
- lcs_release_buffer(channel, buffer);
- card = container_of(channel, struct lcs_card, write);
- if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev))
- netif_wake_queue(card->dev);
- spin_lock(&card->lock);
- card->tx_emitted--;
- if (card->tx_emitted <= 0 && card->tx_buffer != NULL)
- /*
- * Last running tx buffer has finished. Submit partially
- * filled current buffer.
- */
- __lcs_emit_txbuffer(card);
- spin_unlock(&card->lock);
-}
-
-/*
- * Packet transmit function called by network stack
- */
-static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
- struct net_device *dev)
-{
- struct lcs_header *header;
- int rc = NETDEV_TX_OK;
-
- LCS_DBF_TEXT(5, trace, "hardxmit");
- if (skb == NULL) {
- card->stats.tx_dropped++;
- card->stats.tx_errors++;
- return NETDEV_TX_OK;
- }
- if (card->state != DEV_STATE_UP) {
- dev_kfree_skb(skb);
- card->stats.tx_dropped++;
- card->stats.tx_errors++;
- card->stats.tx_carrier_errors++;
- return NETDEV_TX_OK;
- }
- if (skb->protocol == htons(ETH_P_IPV6)) {
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- netif_stop_queue(card->dev);
- spin_lock(&card->lock);
- if (card->tx_buffer != NULL &&
- card->tx_buffer->count + sizeof(struct lcs_header) +
- skb->len + sizeof(u16) > LCS_IOBUFFERSIZE)
- /* skb too big for current tx buffer. */
- __lcs_emit_txbuffer(card);
- if (card->tx_buffer == NULL) {
- /* Get new tx buffer */
- card->tx_buffer = lcs_get_buffer(&card->write);
- if (card->tx_buffer == NULL) {
- card->stats.tx_dropped++;
- rc = NETDEV_TX_BUSY;
- goto out;
- }
- card->tx_buffer->callback = lcs_txbuffer_cb;
- card->tx_buffer->count = 0;
- }
- header = (struct lcs_header *)
- (card->tx_buffer->data + card->tx_buffer->count);
- card->tx_buffer->count += skb->len + sizeof(struct lcs_header);
- header->offset = card->tx_buffer->count;
- header->type = card->lan_type;
- header->slot = card->portno;
- skb_copy_from_linear_data(skb, header + 1, skb->len);
- spin_unlock(&card->lock);
- card->stats.tx_bytes += skb->len;
- card->stats.tx_packets++;
- dev_kfree_skb(skb);
- netif_wake_queue(card->dev);
- spin_lock(&card->lock);
- if (card->tx_emitted <= 0 && card->tx_buffer != NULL)
- /* If this is the first tx buffer emit it immediately. */
- __lcs_emit_txbuffer(card);
-out:
- spin_unlock(&card->lock);
- return rc;
-}
-
-static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(5, trace, "pktxmit");
- card = (struct lcs_card *) dev->ml_priv;
- rc = __lcs_start_xmit(card, skb, dev);
- return rc;
-}
-
-/*
- * send startlan and lanstat command to make LCS device ready
- */
-static int
-lcs_startlan_auto(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(2, trace, "strtauto");
- card->lan_type = LCS_FRAME_TYPE_ENET;
- rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
- if (rc == 0)
- return 0;
-
- return -EIO;
-}
-
-static int
-lcs_startlan(struct lcs_card *card)
-{
- int rc, i;
-
- LCS_DBF_TEXT(2, trace, "startlan");
- rc = 0;
- if (card->portno != LCS_INVALID_PORT_NO) {
- if (card->lan_type == LCS_FRAME_TYPE_AUTO)
- rc = lcs_startlan_auto(card);
- else
- rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP);
- } else {
- for (i = 0; i <= 16; i++) {
- card->portno = i;
- if (card->lan_type != LCS_FRAME_TYPE_AUTO)
- rc = lcs_send_startlan(card,
- LCS_INITIATOR_TCPIP);
- else
- /* autodetecting lan type */
- rc = lcs_startlan_auto(card);
- if (rc == 0)
- break;
- }
- }
- if (rc == 0)
- return lcs_send_lanstat(card);
- return rc;
-}
-
-/*
- * LCS detect function
- * setup channels and make them I/O ready
- */
-static int
-lcs_detect(struct lcs_card *card)
-{
- int rc = 0;
-
- LCS_DBF_TEXT(2, setup, "lcsdetct");
- /* start/reset card */
- if (card->dev)
- netif_stop_queue(card->dev);
- rc = lcs_stop_channels(card);
- if (rc == 0) {
- rc = lcs_start_channels(card);
- if (rc == 0) {
- rc = lcs_send_startup(card, LCS_INITIATOR_TCPIP);
- if (rc == 0)
- rc = lcs_startlan(card);
- }
- }
- if (rc == 0) {
- card->state = DEV_STATE_UP;
- } else {
- card->state = DEV_STATE_DOWN;
- card->write.state = LCS_CH_STATE_INIT;
- card->read.state = LCS_CH_STATE_INIT;
- }
- return rc;
-}
-
-/*
- * LCS Stop card
- */
-static int
-lcs_stopcard(struct lcs_card *card)
-{
- int rc;
-
- LCS_DBF_TEXT(3, setup, "stopcard");
-
- if (card->read.state != LCS_CH_STATE_STOPPED &&
- card->write.state != LCS_CH_STATE_STOPPED &&
- card->read.state != LCS_CH_STATE_ERROR &&
- card->write.state != LCS_CH_STATE_ERROR &&
- card->state == DEV_STATE_UP) {
- lcs_clear_multicast_list(card);
- rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP);
- rc = lcs_send_shutdown(card);
- }
- rc = lcs_stop_channels(card);
- card->state = DEV_STATE_DOWN;
-
- return rc;
-}
-
-/*
- * Kernel Thread helper functions for LGW initiated commands
- */
-static void
-lcs_start_kernel_thread(struct work_struct *work)
-{
- struct lcs_card *card = container_of(work, struct lcs_card, kernel_thread_starter);
- LCS_DBF_TEXT(5, trace, "krnthrd");
- if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD))
- kthread_run(lcs_recovery, card, "lcs_recover");
-#ifdef CONFIG_IP_MULTICAST
- if (lcs_do_start_thread(card, LCS_SET_MC_THREAD))
- kthread_run(lcs_register_mc_addresses, card, "regipm");
-#endif
-}
-
-/*
- * Process control frames.
- */
-static void
-lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
-{
- LCS_DBF_TEXT(5, trace, "getctrl");
- if (cmd->initiator == LCS_INITIATOR_LGW) {
- switch(cmd->cmd_code) {
- case LCS_CMD_STARTUP:
- case LCS_CMD_STARTLAN:
- lcs_schedule_recovery(card);
- break;
- case LCS_CMD_STOPLAN:
- if (card->dev) {
- pr_warn("Stoplan for %s initiated by LGW\n",
- card->dev->name);
- netif_carrier_off(card->dev);
- }
- break;
- default:
- LCS_DBF_TEXT(5, trace, "noLGWcmd");
- break;
- }
- } else
- lcs_notify_lancmd_waiters(card, cmd);
-}
-
-/*
- * Unpack network packet.
- */
-static void
-lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
-{
- struct sk_buff *skb;
-
- LCS_DBF_TEXT(5, trace, "getskb");
- if (card->dev == NULL ||
- card->state != DEV_STATE_UP)
- /* The card isn't up. Ignore the packet. */
- return;
-
- skb = dev_alloc_skb(skb_len);
- if (skb == NULL) {
- dev_err(&card->dev->dev,
- " Allocating a socket buffer to interface %s failed\n",
- card->dev->name);
- card->stats.rx_dropped++;
- return;
- }
- skb_put_data(skb, skb_data, skb_len);
- skb->protocol = card->lan_type_trans(skb, card->dev);
- card->stats.rx_bytes += skb_len;
- card->stats.rx_packets++;
- if (skb->protocol == htons(ETH_P_802_2))
- *((__u32 *)skb->cb) = ++card->pkt_seq;
- netif_rx(skb);
-}
-
-/*
- * LCS main routine to get packets and lancmd replies from the buffers
- */
-static void
-lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
-{
- struct lcs_card *card;
- struct lcs_header *lcs_hdr;
- __u16 offset;
-
- LCS_DBF_TEXT(5, trace, "lcsgtpkt");
- lcs_hdr = (struct lcs_header *) buffer->data;
- if (lcs_hdr->offset == LCS_ILLEGAL_OFFSET) {
- LCS_DBF_TEXT(4, trace, "-eiogpkt");
- return;
- }
- card = container_of(channel, struct lcs_card, read);
- offset = 0;
- while (lcs_hdr->offset != 0) {
- if (lcs_hdr->offset <= 0 ||
- lcs_hdr->offset > LCS_IOBUFFERSIZE ||
- lcs_hdr->offset < offset) {
- /* Offset invalid. */
- card->stats.rx_length_errors++;
- card->stats.rx_errors++;
- return;
- }
- if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL)
- lcs_get_control(card, (struct lcs_cmd *) lcs_hdr);
- else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET)
- lcs_get_skb(card, (char *)(lcs_hdr + 1),
- lcs_hdr->offset - offset -
- sizeof(struct lcs_header));
- else
- dev_info_once(&card->dev->dev,
- "Unknown frame type %d\n",
- lcs_hdr->type);
- offset = lcs_hdr->offset;
- lcs_hdr->offset = LCS_ILLEGAL_OFFSET;
- lcs_hdr = (struct lcs_header *) (buffer->data + offset);
- }
- /* The buffer is now empty. Make it ready again. */
- lcs_ready_buffer(&card->read, buffer);
-}
-
-/*
- * get network statistics for ifconfig and other user programs
- */
-static struct net_device_stats *
-lcs_getstats(struct net_device *dev)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(4, trace, "netstats");
- card = (struct lcs_card *) dev->ml_priv;
- return &card->stats;
-}
-
-/*
- * stop lcs device
- * This function will be called by user doing ifconfig xxx down
- */
-static int
-lcs_stop_device(struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "stopdev");
- card = (struct lcs_card *) dev->ml_priv;
- netif_carrier_off(dev);
- netif_tx_disable(dev);
- dev->flags &= ~IFF_UP;
- wait_event(card->write.wait_q,
- (card->write.state != LCS_CH_STATE_RUNNING));
- rc = lcs_stopcard(card);
- if (rc)
- dev_err(&card->dev->dev,
- " Shutting down the LCS device failed\n");
- return rc;
-}
-
-/*
- * start lcs device and make it runnable
- * This function will be called by user doing ifconfig xxx up
- */
-static int
-lcs_open_device(struct net_device *dev)
-{
- struct lcs_card *card;
- int rc;
-
- LCS_DBF_TEXT(2, trace, "opendev");
- card = (struct lcs_card *) dev->ml_priv;
- /* initialize statistics */
- rc = lcs_detect(card);
- if (rc) {
- pr_err("Error in opening device!\n");
-
- } else {
- dev->flags |= IFF_UP;
- netif_carrier_on(dev);
- netif_wake_queue(dev);
- card->state = DEV_STATE_UP;
- }
- return rc;
-}
-
-/*
- * show function for portno called by cat or similar things
- */
-static ssize_t
-lcs_portno_show (struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- return sysfs_emit(buf, "%d\n", card->portno);
-}
-
-/*
- * store the value which is piped to file portno
- */
-static ssize_t
-lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
- struct lcs_card *card;
- int rc;
- s16 value;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- rc = kstrtos16(buf, 0, &value);
- if (rc)
- return -EINVAL;
- /* TODO: sanity checks */
- card->portno = value;
- if (card->dev)
- card->dev->dev_port = card->portno;
-
- return count;
-
-}
-
-static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store);
-
-static const char *lcs_type[] = {
- "not a channel",
- "2216 parallel",
- "2216 channel",
- "OSA LCS card",
- "unknown channel type",
- "unsupported channel type",
-};
-
-static ssize_t
-lcs_type_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct ccwgroup_device *cgdev;
-
- cgdev = to_ccwgroupdev(dev);
- if (!cgdev)
- return -ENODEV;
-
- return sysfs_emit(buf, "%s\n",
- lcs_type[cgdev->cdev[0]->id.driver_info]);
-}
-
-static DEVICE_ATTR(type, 0444, lcs_type_show, NULL);
-
-static ssize_t
-lcs_timeout_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(dev);
-
- return card ? sysfs_emit(buf, "%u\n", card->lancmd_timeout) : 0;
-}
-
-static ssize_t
-lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
- struct lcs_card *card;
- unsigned int value;
- int rc;
-
- card = dev_get_drvdata(dev);
-
- if (!card)
- return 0;
-
- rc = kstrtouint(buf, 0, &value);
- if (rc)
- return -EINVAL;
- /* TODO: sanity checks */
- card->lancmd_timeout = value;
-
- return count;
-
-}
-
-static DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store);
-
-static ssize_t
-lcs_dev_recover_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct lcs_card *card = dev_get_drvdata(dev);
- char *tmp;
- int i;
-
- if (!card)
- return -EINVAL;
- if (card->state != DEV_STATE_UP)
- return -EPERM;
- i = simple_strtoul(buf, &tmp, 16);
- if (i == 1)
- lcs_schedule_recovery(card);
- return count;
-}
-
-static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store);
-
-static struct attribute * lcs_attrs[] = {
- &dev_attr_portno.attr,
- &dev_attr_type.attr,
- &dev_attr_lancmd_timeout.attr,
- &dev_attr_recover.attr,
- NULL,
-};
-static struct attribute_group lcs_attr_group = {
- .attrs = lcs_attrs,
-};
-static const struct attribute_group *lcs_attr_groups[] = {
- &lcs_attr_group,
- NULL,
-};
-static const struct device_type lcs_devtype = {
- .name = "lcs",
- .groups = lcs_attr_groups,
-};
-
-/*
- * lcs_probe_device is called on establishing a new ccwgroup_device.
- */
-static int
-lcs_probe_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- if (!get_device(&ccwgdev->dev))
- return -ENODEV;
-
- LCS_DBF_TEXT(2, setup, "add_dev");
- card = lcs_alloc_card();
- if (!card) {
- LCS_DBF_TEXT_(2, setup, " rc%d", -ENOMEM);
- put_device(&ccwgdev->dev);
- return -ENOMEM;
- }
- dev_set_drvdata(&ccwgdev->dev, card);
- ccwgdev->cdev[0]->handler = lcs_irq;
- ccwgdev->cdev[1]->handler = lcs_irq;
- card->gdev = ccwgdev;
- INIT_WORK(&card->kernel_thread_starter, lcs_start_kernel_thread);
- card->thread_start_mask = 0;
- card->thread_allowed_mask = 0;
- card->thread_running_mask = 0;
- ccwgdev->dev.type = &lcs_devtype;
-
- return 0;
-}
-
-static int
-lcs_register_netdev(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- LCS_DBF_TEXT(2, setup, "regnetdv");
- card = dev_get_drvdata(&ccwgdev->dev);
- if (card->dev->reg_state != NETREG_UNINITIALIZED)
- return 0;
- SET_NETDEV_DEV(card->dev, &ccwgdev->dev);
- return register_netdev(card->dev);
-}
-
-/*
- * lcs_new_device will be called by setting the group device online.
- */
-static const struct net_device_ops lcs_netdev_ops = {
- .ndo_open = lcs_open_device,
- .ndo_stop = lcs_stop_device,
- .ndo_get_stats = lcs_getstats,
- .ndo_start_xmit = lcs_start_xmit,
-};
-
-static const struct net_device_ops lcs_mc_netdev_ops = {
- .ndo_open = lcs_open_device,
- .ndo_stop = lcs_stop_device,
- .ndo_get_stats = lcs_getstats,
- .ndo_start_xmit = lcs_start_xmit,
- .ndo_set_rx_mode = lcs_set_multicast_list,
-};
-
-static int
-lcs_new_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
- struct net_device *dev=NULL;
- enum lcs_dev_states recover_state;
- int rc;
-
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return -ENODEV;
-
- LCS_DBF_TEXT(2, setup, "newdev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- card->read.ccwdev = ccwgdev->cdev[0];
- card->write.ccwdev = ccwgdev->cdev[1];
-
- recover_state = card->state;
- rc = ccw_device_set_online(card->read.ccwdev);
- if (rc)
- goto out_err;
- rc = ccw_device_set_online(card->write.ccwdev);
- if (rc)
- goto out_werr;
-
- LCS_DBF_TEXT(3, setup, "lcsnewdv");
-
- lcs_setup_card(card);
- rc = lcs_detect(card);
- if (rc) {
- LCS_DBF_TEXT(2, setup, "dtctfail");
- dev_err(&ccwgdev->dev,
- "Detecting a network adapter for LCS devices"
- " failed with rc=%d (0x%x)\n", rc, rc);
- lcs_stopcard(card);
- goto out;
- }
- if (card->dev) {
- LCS_DBF_TEXT(2, setup, "samedev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- goto netdev_out;
- }
- switch (card->lan_type) {
- case LCS_FRAME_TYPE_ENET:
- card->lan_type_trans = eth_type_trans;
- dev = alloc_etherdev(0);
- break;
- default:
- LCS_DBF_TEXT(3, setup, "errinit");
- pr_err(" Initialization failed\n");
- goto out;
- }
- if (!dev)
- goto out;
- card->dev = dev;
- card->dev->ml_priv = card;
- card->dev->netdev_ops = &lcs_netdev_ops;
- card->dev->dev_port = card->portno;
- eth_hw_addr_set(card->dev, card->mac);
-#ifdef CONFIG_IP_MULTICAST
- if (!lcs_check_multicast_support(card))
- card->dev->netdev_ops = &lcs_mc_netdev_ops;
-#endif
-netdev_out:
- lcs_set_allowed_threads(card,0xffffffff);
- if (recover_state == DEV_STATE_RECOVER) {
- lcs_set_multicast_list(card->dev);
- card->dev->flags |= IFF_UP;
- netif_carrier_on(card->dev);
- netif_wake_queue(card->dev);
- card->state = DEV_STATE_UP;
- } else {
- lcs_stopcard(card);
- }
-
- if (lcs_register_netdev(ccwgdev) != 0)
- goto out;
-
- /* Print out supported assists: IPv6 */
- pr_info("LCS device %s %s IPv6 support\n", card->dev->name,
- (card->ip_assists_supported & LCS_IPASS_IPV6_SUPPORT) ?
- "with" : "without");
- /* Print out supported assist: Multicast */
- pr_info("LCS device %s %s Multicast support\n", card->dev->name,
- (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) ?
- "with" : "without");
- return 0;
-out:
-
- ccw_device_set_offline(card->write.ccwdev);
-out_werr:
- ccw_device_set_offline(card->read.ccwdev);
-out_err:
- return -ENODEV;
-}
-
-/*
- * lcs_shutdown_device, called when setting the group device offline.
- */
-static int
-__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode)
-{
- struct lcs_card *card;
- enum lcs_dev_states recover_state;
- int ret = 0, ret2 = 0, ret3 = 0;
-
- LCS_DBF_TEXT(3, setup, "shtdndev");
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return -ENODEV;
- if (recovery_mode == 0) {
- lcs_set_allowed_threads(card, 0);
- if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD))
- return -ERESTARTSYS;
- }
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- recover_state = card->state;
-
- ret = lcs_stop_device(card->dev);
- ret2 = ccw_device_set_offline(card->read.ccwdev);
- ret3 = ccw_device_set_offline(card->write.ccwdev);
- if (!ret)
- ret = (ret2) ? ret2 : ret3;
- if (ret)
- LCS_DBF_TEXT_(3, setup, "1err:%d", ret);
- if (recover_state == DEV_STATE_UP) {
- card->state = DEV_STATE_RECOVER;
- }
- return 0;
-}
-
-static int
-lcs_shutdown_device(struct ccwgroup_device *ccwgdev)
-{
- return __lcs_shutdown_device(ccwgdev, 0);
-}
-
-/*
- * drive lcs recovery after startup and startlan initiated by Lan Gateway
- */
-static int
-lcs_recovery(void *ptr)
-{
- struct lcs_card *card;
- struct ccwgroup_device *gdev;
- int rc;
-
- card = (struct lcs_card *) ptr;
-
- LCS_DBF_TEXT(4, trace, "recover1");
- if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD))
- return 0;
- LCS_DBF_TEXT(4, trace, "recover2");
- gdev = card->gdev;
- dev_warn(&gdev->dev,
- "A recovery process has been started for the LCS device\n");
- rc = __lcs_shutdown_device(gdev, 1);
- rc = lcs_new_device(gdev);
- if (!rc)
- pr_info("Device %s successfully recovered!\n",
- card->dev->name);
- else
- pr_info("Device %s could not be recovered!\n",
- card->dev->name);
- lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD);
- return 0;
-}
-
-/*
- * lcs_remove_device, free buffers and card
- */
-static void
-lcs_remove_device(struct ccwgroup_device *ccwgdev)
-{
- struct lcs_card *card;
-
- card = dev_get_drvdata(&ccwgdev->dev);
- if (!card)
- return;
-
- LCS_DBF_TEXT(3, setup, "remdev");
- LCS_DBF_HEX(3, setup, &card, sizeof(void*));
- if (ccwgdev->state == CCWGROUP_ONLINE) {
- lcs_shutdown_device(ccwgdev);
- }
- if (card->dev)
- unregister_netdev(card->dev);
- lcs_cleanup_card(card);
- lcs_free_card(card);
- dev_set_drvdata(&ccwgdev->dev, NULL);
- put_device(&ccwgdev->dev);
-}
-
-static struct ccw_device_id lcs_ids[] = {
- {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel},
- {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216},
- {CCW_DEVICE(0x3088, 0x60), .driver_info = lcs_channel_type_osa2},
- {},
-};
-MODULE_DEVICE_TABLE(ccw, lcs_ids);
-
-static struct ccw_driver lcs_ccw_driver = {
- .driver = {
- .owner = THIS_MODULE,
- .name = "lcs",
- },
- .ids = lcs_ids,
- .probe = ccwgroup_probe_ccwdev,
- .remove = ccwgroup_remove_ccwdev,
- .int_class = IRQIO_LCS,
-};
-
-/*
- * LCS ccwgroup driver registration
- */
-static struct ccwgroup_driver lcs_group_driver = {
- .driver = {
- .owner = THIS_MODULE,
- .name = "lcs",
- },
- .ccw_driver = &lcs_ccw_driver,
- .setup = lcs_probe_device,
- .remove = lcs_remove_device,
- .set_online = lcs_new_device,
- .set_offline = lcs_shutdown_device,
-};
-
-static ssize_t group_store(struct device_driver *ddrv, const char *buf,
- size_t count)
-{
- int err;
- err = ccwgroup_create_dev(lcs_root_dev, &lcs_group_driver, 2, buf);
- return err ? err : count;
-}
-static DRIVER_ATTR_WO(group);
-
-static struct attribute *lcs_drv_attrs[] = {
- &driver_attr_group.attr,
- NULL,
-};
-static struct attribute_group lcs_drv_attr_group = {
- .attrs = lcs_drv_attrs,
-};
-static const struct attribute_group *lcs_drv_attr_groups[] = {
- &lcs_drv_attr_group,
- NULL,
-};
-
-/*
- * LCS Module/Kernel initialization function
- */
-static int
-__init lcs_init_module(void)
-{
- int rc;
-
- pr_info("Loading %s\n", version);
- rc = lcs_register_debug_facility();
- LCS_DBF_TEXT(0, setup, "lcsinit");
- if (rc)
- goto out_err;
- lcs_root_dev = root_device_register("lcs");
- rc = PTR_ERR_OR_ZERO(lcs_root_dev);
- if (rc)
- goto register_err;
- rc = ccw_driver_register(&lcs_ccw_driver);
- if (rc)
- goto ccw_err;
- lcs_group_driver.driver.groups = lcs_drv_attr_groups;
- rc = ccwgroup_driver_register(&lcs_group_driver);
- if (rc)
- goto ccwgroup_err;
- return 0;
-
-ccwgroup_err:
- ccw_driver_unregister(&lcs_ccw_driver);
-ccw_err:
- root_device_unregister(lcs_root_dev);
-register_err:
- lcs_unregister_debug_facility();
-out_err:
- pr_err("Initializing the lcs device driver failed\n");
- return rc;
-}
-
-
-/*
- * LCS module cleanup function
- */
-static void
-__exit lcs_cleanup_module(void)
-{
- pr_info("Terminating lcs module.\n");
- LCS_DBF_TEXT(0, trace, "cleanup");
- ccwgroup_driver_unregister(&lcs_group_driver);
- ccw_driver_unregister(&lcs_ccw_driver);
- root_device_unregister(lcs_root_dev);
- lcs_unregister_debug_facility();
-}
-
-module_init(lcs_init_module);
-module_exit(lcs_cleanup_module);
-
-MODULE_AUTHOR("Frank Pavlic <fpavlic@de.ibm.com>");
-MODULE_DESCRIPTION("S/390 LAN channel station device driver");
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h
deleted file mode 100644
index a2699b70b050..000000000000
--- a/drivers/s390/net/lcs.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*lcs.h*/
-
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/refcount.h>
-#include <asm/ccwdev.h>
-
-#define LCS_DBF_TEXT(level, name, text) \
- do { \
- debug_text_event(lcs_dbf_##name, level, text); \
- } while (0)
-
-#define LCS_DBF_HEX(level,name,addr,len) \
-do { \
- debug_event(lcs_dbf_##name,level,(void*)(addr),len); \
-} while (0)
-
-#define LCS_DBF_TEXT_(level,name,text...) \
- do { \
- if (debug_level_enabled(lcs_dbf_##name, level)) { \
- scnprintf(debug_buffer, sizeof(debug_buffer), text); \
- debug_text_event(lcs_dbf_##name, level, debug_buffer); \
- } \
- } while (0)
-
-/**
- * sysfs related stuff
- */
-#define CARD_FROM_DEV(cdev) \
- (struct lcs_card *) dev_get_drvdata( \
- &((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev);
-
-/**
- * Enum for classifying detected devices.
- */
-enum lcs_channel_types {
- /* Device is not a channel */
- lcs_channel_type_none,
-
- /* Device is a 2216 channel */
- lcs_channel_type_parallel,
-
- /* Device is a 2216 channel */
- lcs_channel_type_2216,
-
- /* Device is a OSA2 card */
- lcs_channel_type_osa2
-};
-
-/**
- * CCW commands used in this driver
- */
-#define LCS_CCW_WRITE 0x01
-#define LCS_CCW_READ 0x02
-#define LCS_CCW_TRANSFER 0x08
-
-/**
- * LCS device status primitives
- */
-#define LCS_CMD_STARTLAN 0x01
-#define LCS_CMD_STOPLAN 0x02
-#define LCS_CMD_LANSTAT 0x04
-#define LCS_CMD_STARTUP 0x07
-#define LCS_CMD_SHUTDOWN 0x08
-#define LCS_CMD_QIPASSIST 0xb2
-#define LCS_CMD_SETIPM 0xb4
-#define LCS_CMD_DELIPM 0xb5
-
-#define LCS_INITIATOR_TCPIP 0x00
-#define LCS_INITIATOR_LGW 0x01
-#define LCS_STD_CMD_SIZE 16
-#define LCS_MULTICAST_CMD_SIZE 404
-
-/**
- * LCS IPASSIST MASKS,only used when multicast is switched on
- */
-/* Not supported by LCS */
-#define LCS_IPASS_ARP_PROCESSING 0x0001
-#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002
-#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004
-#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
-#define LCS_IPASS_IP_FILTERING 0x0010
-/* Supported by lcs 3172 */
-#define LCS_IPASS_IPV6_SUPPORT 0x0020
-#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
-
-/**
- * LCS sense byte definitions
- */
-#define LCS_SENSE_BYTE_0 0
-#define LCS_SENSE_BYTE_1 1
-#define LCS_SENSE_BYTE_2 2
-#define LCS_SENSE_BYTE_3 3
-#define LCS_SENSE_INTERFACE_DISCONNECT 0x01
-#define LCS_SENSE_EQUIPMENT_CHECK 0x10
-#define LCS_SENSE_BUS_OUT_CHECK 0x20
-#define LCS_SENSE_INTERVENTION_REQUIRED 0x40
-#define LCS_SENSE_CMD_REJECT 0x80
-#define LCS_SENSE_RESETTING_EVENT 0x80
-#define LCS_SENSE_DEVICE_ONLINE 0x20
-
-/**
- * LCS packet type definitions
- */
-#define LCS_FRAME_TYPE_CONTROL 0
-#define LCS_FRAME_TYPE_ENET 1
-#define LCS_FRAME_TYPE_TR 2
-#define LCS_FRAME_TYPE_FDDI 7
-#define LCS_FRAME_TYPE_AUTO -1
-
-/**
- * some more definitions,we will sort them later
- */
-#define LCS_ILLEGAL_OFFSET 0xffff
-#define LCS_IOBUFFERSIZE 0x5000
-#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */
-#define LCS_MAC_LENGTH 6
-#define LCS_INVALID_PORT_NO -1
-#define LCS_LANCMD_TIMEOUT_DEFAULT 5
-
-/**
- * Multicast state
- */
-#define LCS_IPM_STATE_SET_REQUIRED 0
-#define LCS_IPM_STATE_DEL_REQUIRED 1
-#define LCS_IPM_STATE_ON_CARD 2
-
-/**
- * LCS IP Assist declarations
- * seems to be only used for multicast
- */
-#define LCS_IPASS_ARP_PROCESSING 0x0001
-#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002
-#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004
-#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008
-#define LCS_IPASS_IP_FILTERING 0x0010
-#define LCS_IPASS_IPV6_SUPPORT 0x0020
-#define LCS_IPASS_MULTICAST_SUPPORT 0x0040
-
-/**
- * LCS Buffer states
- */
-enum lcs_buffer_states {
- LCS_BUF_STATE_EMPTY, /* buffer is empty */
- LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */
- LCS_BUF_STATE_READY, /* buffer is ready for read/write */
- LCS_BUF_STATE_PROCESSED,
-};
-
-/**
- * LCS Channel State Machine declarations
- */
-enum lcs_channel_states {
- LCS_CH_STATE_INIT,
- LCS_CH_STATE_HALTED,
- LCS_CH_STATE_STOPPED,
- LCS_CH_STATE_RUNNING,
- LCS_CH_STATE_SUSPENDED,
- LCS_CH_STATE_CLEARED,
- LCS_CH_STATE_ERROR,
-};
-
-/**
- * LCS device state machine
- */
-enum lcs_dev_states {
- DEV_STATE_DOWN,
- DEV_STATE_UP,
- DEV_STATE_RECOVER,
-};
-
-enum lcs_threads {
- LCS_SET_MC_THREAD = 1,
- LCS_RECOVERY_THREAD = 2,
-};
-
-/**
- * LCS struct declarations
- */
-struct lcs_header {
- __u16 offset;
- __u8 type;
- __u8 slot;
-} __attribute__ ((packed));
-
-struct lcs_ip_mac_pair {
- __be32 ip_addr;
- __u8 mac_addr[LCS_MAC_LENGTH];
- __u8 reserved[2];
-} __attribute__ ((packed));
-
-struct lcs_ipm_list {
- struct list_head list;
- struct lcs_ip_mac_pair ipm;
- __u8 ipm_state;
-};
-
-struct lcs_cmd {
- __u16 offset;
- __u8 type;
- __u8 slot;
- __u8 cmd_code;
- __u8 initiator;
- __u16 sequence_no;
- __u16 return_code;
- union {
- struct {
- __u8 lan_type;
- __u8 portno;
- __u16 parameter_count;
- __u8 operator_flags[3];
- __u8 reserved[3];
- } lcs_std_cmd;
- struct {
- __u16 unused1;
- __u16 buff_size;
- __u8 unused2[6];
- } lcs_startup;
- struct {
- __u8 lan_type;
- __u8 portno;
- __u8 unused[10];
- __u8 mac_addr[LCS_MAC_LENGTH];
- __u32 num_packets_deblocked;
- __u32 num_packets_blocked;
- __u32 num_packets_tx_on_lan;
- __u32 num_tx_errors_detected;
- __u32 num_tx_packets_disgarded;
- __u32 num_packets_rx_from_lan;
- __u32 num_rx_errors_detected;
- __u32 num_rx_discarded_nobuffs_avail;
- __u32 num_rx_packets_too_large;
- } lcs_lanstat_cmd;
-#ifdef CONFIG_IP_MULTICAST
- struct {
- __u8 lan_type;
- __u8 portno;
- __u16 num_ip_pairs;
- __u16 ip_assists_supported;
- __u16 ip_assists_enabled;
- __u16 version;
- struct {
- struct lcs_ip_mac_pair
- ip_mac_pair[32];
- __u32 response_data;
- } lcs_ipass_ctlmsg __attribute ((packed));
- } lcs_qipassist __attribute__ ((packed));
-#endif /*CONFIG_IP_MULTICAST */
- } cmd __attribute__ ((packed));
-} __attribute__ ((packed));
-
-/**
- * Forward declarations.
- */
-struct lcs_card;
-struct lcs_channel;
-
-/**
- * Definition of an lcs buffer.
- */
-struct lcs_buffer {
- enum lcs_buffer_states state;
- void *data;
- int count;
- /* Callback for completion notification. */
- void (*callback)(struct lcs_channel *, struct lcs_buffer *);
-};
-
-struct lcs_reply {
- struct list_head list;
- __u16 sequence_no;
- refcount_t refcnt;
- /* Callback for completion notification. */
- void (*callback)(struct lcs_card *, struct lcs_cmd *);
- wait_queue_head_t wait_q;
- struct lcs_card *card;
- struct timer_list timer;
- int received;
- int rc;
-};
-
-/**
- * Definition of an lcs channel
- */
-struct lcs_channel {
- enum lcs_channel_states state;
- struct ccw_device *ccwdev;
- struct ccw1 ccws[LCS_NUM_BUFFS + 1];
- wait_queue_head_t wait_q;
- struct tasklet_struct irq_tasklet;
- struct lcs_buffer iob[LCS_NUM_BUFFS];
- int io_idx;
- int buf_idx;
-};
-
-
-/**
- * definition of the lcs card
- */
-struct lcs_card {
- spinlock_t lock;
- spinlock_t ipm_lock;
- enum lcs_dev_states state;
- struct net_device *dev;
- struct net_device_stats stats;
- __be16 (*lan_type_trans)(struct sk_buff *skb,
- struct net_device *dev);
- struct ccwgroup_device *gdev;
- struct lcs_channel read;
- struct lcs_channel write;
- struct lcs_buffer *tx_buffer;
- int tx_emitted;
- struct list_head lancmd_waiters;
- int lancmd_timeout;
-
- struct work_struct kernel_thread_starter;
- spinlock_t mask_lock;
- unsigned long thread_start_mask;
- unsigned long thread_running_mask;
- unsigned long thread_allowed_mask;
- wait_queue_head_t wait_q;
-
-#ifdef CONFIG_IP_MULTICAST
- struct list_head ipm_list;
-#endif
- __u8 mac[LCS_MAC_LENGTH];
- __u16 ip_assists_supported;
- __u16 ip_assists_enabled;
- __s8 lan_type;
- __u32 pkt_seq;
- __u16 sequence_no;
- __s16 portno;
- /* Some info copied from probeinfo */
- u8 device_forced;
- u8 max_port_no;
- u8 hint_port_no;
- s16 port_protocol_no;
-} __attribute__ ((aligned(8)));
-
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 27f42f713c89..87edb7a8173b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
bool *vlan_offload_disabled);
void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
struct _rule_hw *eth_header);
-int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index af86097641b0..46bd7550adf8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -54,7 +54,6 @@
#include <linux/mlx5/doorbell.h>
#include <linux/mlx5/eq.h>
#include <linux/timecounter.h>
-#include <linux/ptp_clock_kernel.h>
#include <net/devlink.h>
#define MLX5_ADEV_NAME "mlx5_core"
@@ -679,33 +678,8 @@ struct mlx5_rsvd_gids {
struct ida ida;
};
-#define MAX_PIN_NUM 8
-struct mlx5_pps {
- u8 pin_caps[MAX_PIN_NUM];
- struct work_struct out_work;
- u64 start[MAX_PIN_NUM];
- u8 enabled;
- u64 min_npps_period;
- u64 min_out_pulse_duration_ns;
-};
-
-struct mlx5_timer {
- struct cyclecounter cycles;
- struct timecounter tc;
- u32 nominal_c_mult;
- unsigned long overflow_period;
-};
-
-struct mlx5_clock {
- struct mlx5_nb pps_nb;
- seqlock_t lock;
- struct hwtstamp_config hwtstamp_config;
- struct ptp_clock *ptp;
- struct ptp_clock_info ptp_info;
- struct mlx5_pps pps_info;
- struct mlx5_timer timer;
-};
-
+struct mlx5_clock;
+struct mlx5_clock_dev_state;
struct mlx5_dm;
struct mlx5_fw_tracer;
struct mlx5_vxlan;
@@ -789,7 +763,8 @@ struct mlx5_core_dev {
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
- struct mlx5_clock clock;
+ struct mlx5_clock *clock;
+ struct mlx5_clock_dev_state *clock_state;
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
struct mlx5_rsc_dump *rsc_dump;
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index e68d42b8ce65..fd625e0dd869 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode {
MLX5E_100GAUI_1_100GBASE_CR_KR = 11,
MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12,
MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13,
+ MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14,
MLX5E_400GAUI_8_400GBASE_CR8 = 15,
MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16,
+ MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17,
MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19,
+ MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20,
MLX5E_EXT_LINK_MODES_NUMBER,
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..fccc03cd2164 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -658,6 +658,7 @@ struct netdev_queue {
struct Qdisc __rcu *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
+ const struct attribute_group **groups;
#endif
unsigned long tx_maxrate;
/*
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4bc2ee0b10b0..ccaaf4c7d5f6 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -43,6 +43,7 @@ extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
+extern int rtnl_lock_interruptible(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 596836abf7bf..af40842f229d 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -16,6 +16,7 @@ struct netdev_rx_queue {
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
+ const struct attribute_group **groups;
struct net_device *dev;
netdevice_tracker dev_tracker;
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 1b58faa4f20f..c61d5b21e7b4 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -24,11 +24,20 @@ struct net_iov {
unsigned long __unused_padding;
unsigned long pp_magic;
struct page_pool *pp;
- struct dmabuf_genpool_chunk_owner *owner;
+ struct net_iov_area *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
+struct net_iov_area {
+ /* Array of net_iovs for this area. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+};
+
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
@@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
+static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
/* netmem */
/**
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
new file mode 100644
index 000000000000..b3e665897767
--- /dev/null
+++ b/include/net/page_pool/memory_provider.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H
+#define _NET_PAGE_POOL_MEMORY_PROVIDER_H
+
+#include <net/netmem.h>
+#include <net/page_pool/types.h>
+
+struct netdev_rx_queue;
+struct sk_buff;
+
+struct memory_provider_ops {
+ netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp);
+ bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem);
+ int (*init)(struct page_pool *pool);
+ void (*destroy)(struct page_pool *pool);
+ int (*nl_fill)(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq);
+ void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq);
+};
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
+void net_mp_niov_clear_page_pool(struct net_iov *niov);
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p);
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p);
+
+/**
+ * net_mp_netmem_place_in_cache() - give a netmem to a page pool
+ * @pool: the page pool to place the netmem into
+ * @netmem: netmem to give
+ *
+ * Push an accounted netmem into the page pool's allocation cache. The caller
+ * must ensure that there is space in the cache. It should only be called off
+ * the mp_ops->alloc_netmems() path.
+ */
+static inline void net_mp_netmem_place_in_cache(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ pool->alloc.cache[pool->alloc.count++] = netmem;
+}
+
+#endif
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 7f405672b089..36eb57d73abc 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -152,8 +152,11 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+struct memory_provider_ops;
+
struct pp_memory_provider_params {
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
};
struct page_pool {
@@ -216,6 +219,7 @@ struct page_pool {
struct ptr_ring ring;
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9b18c4cfe56f..2feba0929a8a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100,
ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101,
ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102,
+ ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103,
+ ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104,
+ ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105,
+ ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106,
+ ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107,
+ ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108,
+ ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109,
+ ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110,
+ ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111,
+ ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112,
+ ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113,
+ ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114,
+ ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115,
+ ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116,
+ ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117,
+ ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118,
+ ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119,
+ ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120,
/* must be last entry */
__ETHTOOL_LINK_MODE_MASK_NBITS
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index e4be227d3ad6..6c6ee183802d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -87,6 +87,11 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
@@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -136,6 +142,7 @@ enum {
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 1a52a0bca086..7e1ad229e133 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
/* host join */
if (!port) {
- if (mp->host_joined) {
+ if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..d5ab9a4b318e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -159,6 +159,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/rps.h>
#include <linux/phy_link_topology.h>
@@ -6119,16 +6120,18 @@ EXPORT_SYMBOL(netif_receive_skb_list);
static void flush_backlog(struct work_struct *work)
{
struct sk_buff *skb, *tmp;
+ struct sk_buff_head list;
struct softnet_data *sd;
+ __skb_queue_head_init(&list);
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
- dev_kfree_skb_irq(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
@@ -6136,14 +6139,16 @@ static void flush_backlog(struct work_struct *work)
local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
+
+ __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY);
}
static bool flush_required(int cpu)
@@ -7071,6 +7076,9 @@ void __netif_napi_del_locked(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
+ /* Make sure NAPI is disabled (or was never enabled). */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
+
if (napi->config) {
napi->index = -1;
napi->config = NULL;
@@ -11738,6 +11746,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
}
EXPORT_SYMBOL(unregister_netdevice_queue);
+static void dev_memory_provider_uninstall(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ struct netdev_rx_queue *rxq = &dev->_rx[i];
+ struct pp_memory_provider_params *p = &rxq->mp_params;
+
+ if (p->mp_ops && p->mp_ops->uninstall)
+ p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+ }
+}
+
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh)
{
@@ -11792,7 +11813,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
- dev_dmabuf_uninstall(dev);
+ dev_memory_provider_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 3bba3f018df0..7c6e0b5b6acb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -16,6 +16,7 @@
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <trace/events/page_pool.h>
#include "devmem.h"
@@ -27,20 +28,28 @@
/* Protected by rtnl_lock() */
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+static const struct memory_provider_ops dmabuf_devmem_ops;
+
+bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return niov->pp->mp_ops == &dmabuf_devmem_ops;
+}
+
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
struct gen_pool_chunk *chunk,
void *not_used)
{
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
- kvfree(owner->niovs);
+ kvfree(owner->area.niovs);
kfree(owner);
}
static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct dmabuf_genpool_chunk_owner *owner;
+ owner = net_devmem_iov_to_chunk_owner(niov);
return owner->base_dma_addr +
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
}
@@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
offset = dma_addr - owner->base_dma_addr;
index = offset / PAGE_SIZE;
- niov = &owner->niovs[index];
+ niov = &owner->area.niovs[index];
niov->pp_magic = 0;
niov->pp = NULL;
@@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
void net_devmem_free_dmabuf(struct net_iov *niov)
{
- struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
unsigned long dma_addr = net_devmem_get_dma_addr(niov);
if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
@@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
WARN_ON(rxq->mp_params.mp_priv != binding);
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
rxq_idx = get_netdev_rx_queue_index(rxq);
@@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
}
rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_priv) {
+ if (rxq->mp_params.mp_ops) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
return -EEXIST;
}
@@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return err;
rxq->mp_params.mp_priv = binding;
+ rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
@@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
err_xa_erase:
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
xa_erase(&binding->bound_rxqs, xa_idx);
return err;
@@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->base_virtual = virtual;
+ owner->area.base_virtual = virtual;
owner->base_dma_addr = dma_addr;
- owner->num_niovs = len / PAGE_SIZE;
+ owner->area.num_niovs = len / PAGE_SIZE;
owner->binding = binding;
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
@@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->niovs = kvmalloc_array(owner->num_niovs,
- sizeof(*owner->niovs),
- GFP_KERNEL);
- if (!owner->niovs) {
+ owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
+ sizeof(*owner->area.niovs),
+ GFP_KERNEL);
+ if (!owner->area.niovs) {
err = -ENOMEM;
goto err_free_chunks;
}
- for (i = 0; i < owner->num_niovs; i++) {
- niov = &owner->niovs[i];
- niov->owner = owner;
+ for (i = 0; i < owner->area.num_niovs; i++) {
+ niov = &owner->area.niovs[i];
+ niov->owner = &owner->area;
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
net_devmem_get_dma_addr(niov));
}
@@ -313,26 +325,6 @@ err_put_dmabuf:
return ERR_PTR(err);
}
-void dev_dmabuf_uninstall(struct net_device *dev)
-{
- struct net_devmem_dmabuf_binding *binding;
- struct netdev_rx_queue *rxq;
- unsigned long xa_idx;
- unsigned int i;
-
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- binding = dev->_rx[i].mp_params.mp_priv;
- if (!binding)
- continue;
-
- xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
- if (rxq == &dev->_rx[i]) {
- xa_erase(&binding->bound_rxqs, xa_idx);
- break;
- }
- }
-}
-
/*** "Dmabuf devmem memory provider" ***/
int mp_dmabuf_devmem_init(struct page_pool *pool)
@@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
/* We don't want the page pool put_page()ing our net_iovs. */
return false;
}
+
+static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq)
+{
+ const struct net_devmem_dmabuf_binding *binding = mp_priv;
+ int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
+
+ return nla_put_u32(rsp, type, binding->id);
+}
+
+static void mp_dmabuf_devmem_uninstall(void *mp_priv,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = mp_priv;
+ struct netdev_rx_queue *bound_rxq;
+ unsigned long xa_idx;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
+ if (bound_rxq == rxq) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+static const struct memory_provider_ops dmabuf_devmem_ops = {
+ .init = mp_dmabuf_devmem_init,
+ .destroy = mp_dmabuf_devmem_destroy,
+ .alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
+ .release_netmem = mp_dmabuf_devmem_release_page,
+ .nl_fill = mp_dmabuf_devmem_nl_fill,
+ .uninstall = mp_dmabuf_devmem_uninstall,
+};
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 76099ef9c482..7fc158d52729 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -10,6 +10,8 @@
#ifndef _NET_DEVMEM_H
#define _NET_DEVMEM_H
+#include <net/netmem.h>
+
struct netlink_ext_ack;
struct net_devmem_dmabuf_binding {
@@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding {
* allocations from this chunk.
*/
struct dmabuf_genpool_chunk_owner {
- /* Offset into the dma-buf where this chunk starts. */
- unsigned long base_virtual;
+ struct net_iov_area area;
+ struct net_devmem_dmabuf_binding *binding;
/* dma_addr of the start of the chunk. */
dma_addr_t base_dma_addr;
-
- /* Array of net_iovs for this chunk. */
- struct net_iov *niovs;
- size_t num_niovs;
-
- struct net_devmem_dmabuf_binding *binding;
};
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
@@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack);
-void dev_dmabuf_uninstall(struct net_device *dev);
static inline struct dmabuf_genpool_chunk_owner *
-net_iov_owner(const struct net_iov *niov)
+net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
{
- return niov->owner;
+ struct net_iov_area *owner = net_iov_owner(niov);
+
+ return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
}
-static inline unsigned int net_iov_idx(const struct net_iov *niov)
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_iov_binding(const struct net_iov *niov)
{
- return niov - net_iov_owner(niov)->niovs;
+ return net_devmem_iov_to_chunk_owner(niov)->binding;
}
-static inline struct net_devmem_dmabuf_binding *
-net_iov_binding(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
- return net_iov_owner(niov)->binding;
+ return net_devmem_iov_binding(niov)->id;
}
static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct net_iov_area *owner = net_iov_owner(niov);
return owner->base_virtual +
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
-{
- return net_iov_owner(niov)->binding->id;
-}
-
static inline void
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
{
@@ -123,6 +115,8 @@ struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
void net_devmem_free_dmabuf(struct net_iov *ppiov);
+bool net_is_devmem_iov(struct net_iov *niov);
+
#else
struct net_devmem_dmabuf_binding;
@@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return -EOPNOTSUPP;
}
-static inline void dev_dmabuf_uninstall(struct net_device *dev)
-{
-}
-
static inline struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
{
@@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
return 0;
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
return 0;
}
+
+static inline bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return false;
+}
#endif
#endif /* _NET_DEVMEM_H */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bd0251bd74a1..d8dd686b5287 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
return -ENOENT;
}
-static void neigh_parms_destroy(struct neigh_parms *parms);
-
static inline void neigh_parms_put(struct neigh_parms *parms)
{
if (refcount_dec_and_test(&parms->refcnt))
- neigh_parms_destroy(parms);
+ kfree(parms);
}
/*
@@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
EXPORT_SYMBOL(neigh_parms_release);
-static void neigh_parms_destroy(struct neigh_parms *parms)
-{
- kfree(parms);
-}
-
static struct lock_class_key neigh_table_proxy_queue_class;
static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 07cb99b114bd..3fe2c521e574 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev)
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
+/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
+ * when unregistering a net device and accessing associated sysfs files. The
+ * potential deadlock is as follow:
+ *
+ * CPU 0 CPU 1
+ *
+ * rtnl_lock vfs_read
+ * unregister_netdevice_many kernfs_seq_start
+ * device_del / kobject_put kernfs_get_active (kn->active++)
+ * kernfs_drain sysfs_kf_seq_show
+ * wait_event( rtnl_lock
+ * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
+ * -> waits on CPU 1 to decrease kn->active the rtnl lock.
+ *
+ * The historical fix was to use rtnl_trylock with restart_syscall to bail out
+ * of sysfs operations when the lock couldn't be taken. This fixed the above
+ * issue as it allowed CPU 1 to bail out of the ABBA situation.
+ *
+ * But it came with performances issues, as syscalls are being restarted in
+ * loops when there was contention on the rtnl lock, with huge slow downs in
+ * specific scenarios (e.g. lots of virtual interfaces created and userspace
+ * daemons querying their attributes).
+ *
+ * The idea below is to bail out of the active kernfs_node protection
+ * (kn->active) while trying to take the rtnl lock.
+ *
+ * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
+ * net device is guaranteed to be alive if this returns successfully.
+ */
+static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
+ struct net_device *ndev)
+{
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ /* First, we hold a reference to the net device as the unregistration
+ * path might run in parallel. This will ensure the net device and the
+ * associated sysfs objects won't be freed while we try to take the rtnl
+ * lock.
+ */
+ dev_hold(ndev);
+ /* sysfs_break_active_protection was introduced to allow self-removal of
+ * devices and their associated sysfs files by bailing out of the
+ * sysfs/kernfs protection. We do this here to allow the unregistration
+ * path to complete in parallel. The following takes a reference on the
+ * kobject and the kernfs_node being accessed.
+ *
+ * This works because we hold a reference onto the net device and the
+ * unregistration path will wait for us eventually in netdev_run_todo
+ * (outside an rtnl lock section).
+ */
+ kn = sysfs_break_active_protection(kobj, attr);
+ /* We can now try to take the rtnl lock. This can't deadlock us as the
+ * unregistration path is able to drain sysfs files (kernfs_node) thanks
+ * to the above dance.
+ */
+ if (rtnl_lock_interruptible()) {
+ ret = -ERESTARTSYS;
+ goto unbreak;
+ }
+ /* Check dismantle on the device hasn't started, otherwise deny the
+ * operation.
+ */
+ if (!dev_isalive(ndev)) {
+ rtnl_unlock();
+ ret = -ENODEV;
+ goto unbreak;
+ }
+ /* We are now sure the device dismantle hasn't started nor that it can
+ * start before we exit the locking section as we hold the rtnl lock.
+ * There's no need to keep unbreaking the sysfs protection nor to hold
+ * a net device reference from that point; that was only needed to take
+ * the rtnl lock.
+ */
+unbreak:
+ sysfs_unbreak_active_protection(kn);
+ dev_put(ndev);
+
+ return ret;
+}
+
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
@@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (ret)
goto err;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ goto err;
+
+ ret = (*set)(netdev, new);
+ if (ret == 0)
+ ret = len;
- if (dev_isalive(netdev)) {
- ret = (*set)(netdev, new);
- if (ret == 0)
- ret = len;
- }
rtnl_unlock();
err:
return ret;
@@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
/* The check is also done in change_carrier; this helps returning early
- * without hitting the trylock/restart in netdev_store.
+ * without hitting the locking section in netdev_store.
*/
if (!netdev->netdev_ops->ndo_change_carrier)
return -EOPNOTSUPP;
@@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
/* Synchronize carrier state with link watch,
@@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev,
ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
- rtnl_unlock();
+ rtnl_unlock();
return ret;
}
static DEVICE_ATTR_RW(carrier);
@@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- ret = dev_set_alias(netdev, buf, count);
- if (ret < 0)
- goto err;
- ret = len;
- netdev_state_change(netdev);
- }
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
err:
rtnl_unlock();
@@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid;
ssize_t ret = -EINVAL;
/* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the trylock/restart below.
+ * early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid;
+ ret = dev_get_phys_port_id(netdev, &ppid);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_phys_port_id(netdev, &ppid);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ char name[IFNAMSIZ];
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- char name[IFNAMSIZ];
+ ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+ if (!ret)
+ ret = sysfs_emit(buf, "%s\n", name);
- ret = dev_get_phys_port_name(netdev, name, sizeof(name));
- if (!ret)
- ret = sysfs_emit(buf, "%s\n", name);
- }
rtnl_unlock();
return ret;
@@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid = { };
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below. This works
+ * returning early without hitting the locking section below. This works
* because recurse is false when calling dev_get_port_parent_id.
*/
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid = { };
+ ret = dev_get_port_parent_id(netdev, &ppid, false);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_port_parent_id(netdev, &ppid, false);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
- .default_groups = rx_queue_default_groups,
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
@@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Rx queues are cleared in rx_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
@@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = rx_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
- goto err;
+ goto err_default_groups;
}
error = rx_queue_default_mask(dev, queue);
if (error)
- goto err;
+ goto err_default_groups;
kobject_uevent(kobj, KOBJ_ADD);
return error;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
- struct kobject *kobj = &dev->_rx[i].kobj;
+ struct netdev_rx_queue *queue = &dev->_rx[i];
+ struct kobject *kobj = &queue->kobj;
if (!refcount_read(&dev_net(dev)->ns.count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+ sysfs_remove_groups(kobj, queue->groups);
kobject_put(kobj);
}
@@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num,
*/
struct netdev_queue_attribute {
struct attribute attr;
- ssize_t (*show)(struct netdev_queue *queue, char *buf);
- ssize_t (*store)(struct netdev_queue *queue,
- const char *buf, size_t len);
+ ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len);
};
#define to_netdev_queue_attr(_attr) \
container_of(_attr, struct netdev_queue_attribute, attr)
@@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
if (!attribute->show)
return -EIO;
- return attribute->show(queue, buf);
+ return attribute->show(kobj, attr, queue, buf);
}
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
@@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
if (!attribute->store)
return -EIO;
- return attribute->store(queue, buf, count);
+ return attribute->store(kobj, attr, queue, buf, count);
}
static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
-static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
+static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
@@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
-static ssize_t traffic_class_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
- int num_tc, tc;
- int index;
+ int num_tc, tc, index, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
index = get_netdev_queue_index(queue);
@@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static ssize_t tx_maxrate_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}
-static ssize_t tx_maxrate_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
- struct net_device *dev = queue->dev;
int err, index = get_netdev_queue_index(queue);
+ struct net_device *dev = queue->dev;
u32 rate = 0;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* The check is also done later; this helps returning early without
- * hitting the trylock/restart below.
+ * hitting the locking section below.
*/
if (!dev->netdev_ops->ndo_set_tx_maxrate)
return -EOPNOTSUPP;
@@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (err < 0)
return err;
- if (!rtnl_trylock())
- return restart_syscall();
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err)
+ return err;
err = -EOPNOTSUPP;
if (dev->netdev_ops->ndo_set_tx_maxrate)
err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
- rtnl_unlock();
if (!err) {
queue->tx_maxrate = rate;
+ rtnl_unlock();
return len;
}
+
+ rtnl_unlock();
return err;
}
@@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count,
return count;
}
-static ssize_t bql_show_hold_time(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}
-static ssize_t bql_set_hold_time(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
-static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}
-static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
-static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}
-static ssize_t bql_set_stall_max(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
WRITE_ONCE(queue->dql.stall_max, 0);
return len;
@@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
-static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
-static ssize_t bql_show_inflight(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
__ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
-static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
- char *buf) \
+static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, char *buf) \
{ \
return bql_show(buf, queue->dql.FIELD); \
} \
\
-static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
+static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, \
const char *buf, size_t len) \
{ \
return bql_set(buf, len, &queue->dql.FIELD); \
@@ -1613,19 +1733,21 @@ out_no_maps:
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int len, tc;
+ int len, tc, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
@@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
return -EINVAL;
}
- /* Make sure the subordinate device can't be freed */
- get_device(&dev->dev);
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
- put_device(&dev->dev);
+ dev_put(dev);
return len;
}
-static ssize_t xps_cpus_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct net_device *dev = queue->dev;
unsigned int index;
@@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
free_cpumask_var(mask);
- return restart_syscall();
+ return err;
}
err = netif_set_xps_queue(dev, mask, index);
@@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
= __ATTR_RW(xps_cpus);
-static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int tc;
+ int tc, ret;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, dev);
+ if (ret)
+ return ret;
tc = netdev_txq_to_tc(dev, index);
+
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
- if (tc < 0)
- return -EINVAL;
- return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
+ ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
+ dev_put(dev);
+ return ret;
}
-static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
@@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
bitmap_free(mask);
- return restart_syscall();
+ return err;
}
cpus_read_lock();
@@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
- .default_groups = netdev_queue_default_groups,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
@@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Tx queues are cleared in netdev_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
@@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = netdev_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (netdev_uses_bql(dev)) {
error = sysfs_create_group(kobj, &dql_group);
if (error)
- goto err;
+ goto err_default_groups;
}
kobject_uevent(kobj, KOBJ_ADD);
return 0;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (netdev_uses_bql(dev))
sysfs_remove_group(&queue->kobj, &dql_group);
+ sysfs_remove_groups(&queue->kobj, queue->groups);
kobject_put(&queue->kobj);
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 715f85c6b62e..5b459b4fef46 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,6 +10,7 @@
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
+#include <net/page_pool/memory_provider.h>
#include "dev.h"
#include "devmem.h"
@@ -368,7 +369,7 @@ static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding;
+ struct pp_memory_provider_params *params;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -385,15 +386,15 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
rxq = __netif_get_rx_queue(netdev, q_idx);
+
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
- binding = rxq->mp_params.mp_priv;
- if (binding &&
- nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ params = &rxq->mp_params;
+ if (params->mp_ops &&
+ params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
-
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index db82786fa0c4..db46880f37cc 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -3,6 +3,7 @@
#include <linux/netdevice.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/memory_provider.h>
#include "page_pool_priv.h"
@@ -80,3 +81,71 @@ err_free_new_mem:
return err;
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
+
+static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ struct netdev_rx_queue *rxq;
+ int ret;
+
+ if (ifq_idx >= dev->real_num_rx_queues)
+ return -EINVAL;
+ ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+ if (rxq->mp_params.mp_ops)
+ return -EEXIST;
+
+ rxq->mp_params = *p;
+ ret = netdev_rx_queue_restart(dev, ifq_idx);
+ if (ret) {
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ }
+ return ret;
+}
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __net_mp_open_rxq(dev, ifq_idx, p);
+ rtnl_unlock();
+ return ret;
+}
+
+static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ struct netdev_rx_queue *rxq;
+
+ if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+ return;
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+
+ /* Callers holding a netdev ref may get here after we already
+ * went thru shutdown via dev_memory_provider_uninstall().
+ */
+ if (dev->reg_state > NETREG_REGISTERED &&
+ !rxq->mp_params.mp_ops)
+ return;
+
+ if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
+ rxq->mp_params.mp_priv != old_p->mp_priv))
+ return;
+
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
+}
+
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ rtnl_lock();
+ __net_mp_close_rxq(dev, ifq_idx, old_p);
+ rtnl_unlock();
+}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f5e908c9e7ad..686bd4a117d9 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -13,6 +13,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -285,13 +286,19 @@ static int page_pool_init(struct page_pool *pool,
rxq = __netif_get_rx_queue(pool->slow.netdev,
pool->slow.queue_idx);
pool->mp_priv = rxq->mp_params.mp_priv;
+ pool->mp_ops = rxq->mp_params.mp_ops;
}
- if (pool->mp_priv) {
+ if (pool->mp_ops) {
if (!pool->dma_map || !pool->dma_sync)
return -EOPNOTSUPP;
- err = mp_dmabuf_devmem_init(pool);
+ if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) {
+ err = -EFAULT;
+ goto free_ptr_ring;
+ }
+
+ err = pool->mp_ops->init(pool);
if (err) {
pr_warn("%s() mem-provider init failed %d\n", __func__,
err);
@@ -587,8 +594,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
@@ -679,8 +686,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
bool put;
put = true;
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- put = mp_dmabuf_devmem_release_page(pool, netmem);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ put = pool->mp_ops->release_netmem(pool, netmem);
else
__page_pool_release_page_dma(pool, netmem);
@@ -1048,8 +1055,8 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
- if (pool->mp_priv) {
- mp_dmabuf_devmem_destroy(pool);
+ if (pool->mp_ops) {
+ pool->mp_ops->destroy(pool);
static_branch_dec(&page_pool_mem_providers);
}
@@ -1190,3 +1197,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr);
+}
+
+/* Associate a niov with a page pool. Should follow with a matching
+ * net_mp_niov_clear_page_pool()
+ */
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+}
+
+/* Disassociate a niov from a page pool. Should only be used in the
+ * ->release_netmem() path.
+ */
+void net_mp_niov_clear_page_pool(struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_clear_pp_info(netmem);
+}
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 6677e0c2e256..9d8a3d8597fa 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -8,9 +8,9 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
+#include <net/page_pool/memory_provider.h>
#include <net/sock.h>
-#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -216,7 +216,6 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
unsigned int napi_id;
void *hdr;
@@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
- if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
goto err_cancel;
genlmsg_end(rsp, hdr);
@@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool)
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq)
{
- struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ void *binding = rxq->mp_params.mp_priv;
struct page_pool *pool;
struct hlist_node *n;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1e559fce918..abe1a461ea67 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -80,6 +80,11 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_interruptible(void)
+{
+ return mutex_lock_interruptible(&rtnl_mutex);
+}
+
int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index d88e9080643b..7149d07e90c6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(10, T1S, Half),
__DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_NAME(10, T1BRR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR_2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, VR, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, VR2, Full),
+ __DEFINE_LINK_MODE_NAME(800000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full),
+ __DEFINE_LINK_MODE_NAME(800000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_CR4 4
#define __LINK_MODE_LANES_CR8 8
#define __LINK_MODE_LANES_DR 1
+#define __LINK_MODE_LANES_DR_2 1
#define __LINK_MODE_LANES_DR2 2
+#define __LINK_MODE_LANES_DR2_2 2
#define __LINK_MODE_LANES_DR4 4
+#define __LINK_MODE_LANES_DR4_2 4
#define __LINK_MODE_LANES_DR8 8
#define __LINK_MODE_LANES_KR 1
#define __LINK_MODE_LANES_KR2 2
@@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1L 1
#define __LINK_MODE_LANES_T1S 1
#define __LINK_MODE_LANES_T1S_P2MP 1
+#define __LINK_MODE_LANES_VR 1
+#define __LINK_MODE_LANES_VR2 2
+#define __LINK_MODE_LANES_VR4 4
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
#define __LINK_MODE_LANES_T1BRR 1
@@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, VR, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ed1b6b44faf8..c9f11a046c26 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
- data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
+
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d704bda6c41..7f43d31c9400 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2476,6 +2476,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
}
niov = skb_frag_net_iov(frag);
+ if (!net_is_devmem_iov(niov)) {
+ err = -ENODEV;
+ goto out;
+ }
+
end = start + skb_frag_size(frag);
copy = end - offset;
@@ -2494,7 +2499,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
/* Will perform the exchange later */
dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
- dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+ dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov);
offset += copy;
remaining_len -= copy;
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index e4be227d3ad6..6c6ee183802d 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -87,6 +87,11 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
@@ -94,6 +99,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -136,6 +142,7 @@ enum {
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 0712b5e82eb7..d027a07c1e2c 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -17,9 +17,11 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
- $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
+CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index c2eabc90dce8..b22082fd660e 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -100,7 +100,7 @@ class Type(SpecAttr):
if isinstance(value, int):
return value
if value in self.family.consts:
- raise Exception("Resolving family constants not implemented, yet")
+ return self.family.consts[value]["value"]
return limit_to_number(value)
def get_limit_str(self, limit, default=None, suffix=''):
@@ -110,6 +110,9 @@ class Type(SpecAttr):
if isinstance(value, int):
return str(value) + suffix
if value in self.family.consts:
+ const = self.family.consts[value]
+ if const.get('header'):
+ return c_upper(value)
return c_upper(f"{self.family['name']}-{value}")
return c_upper(value)
@@ -2549,6 +2552,9 @@ def render_uapi(family, cw):
defines = []
for const in family['definitions']:
+ if const.get('header'):
+ continue
+
if const['type'] != 'const':
cw.writes_defines(defines)
defines = []
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 137470bdee0c..c7f1c443f2af 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -7,6 +7,7 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \
TEST_PROGS := \
netcons_basic.sh \
+ netcons_fragmented_msg.sh \
netcons_overflow.sh \
ping.py \
queues.py \
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3acaba41ac7b..0c262b123fdd 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -110,6 +110,13 @@ function create_dynamic_target() {
echo 1 > "${NETCONS_PATH}"/enabled
}
+# Do not append the release to the header of the message
+function disable_release_append() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
function cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+# - Correct fragmentation of large messages
+# - Proper reassembly of fragments at the receiver
+# - Preservation of userdata across fragments
+# - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+ # header is everything before ;
+ local HEADER="${1}"
+ REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+ echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+ local MSGFILE="${1}"
+ # Extract the header, which is the very first thing that arrives in the
+ # first list.
+ HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+ HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+ # Remove the two headers from the received message
+ # This will return the message without any header, similarly to what
+ # was sent.
+ sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+# 13,468,514729715,-,ncfrag=0/1135;<message>
+# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+ # Discard the netconsole headers, and assemble the full message
+ RCVMSG=$(extract_msg "${1}")
+
+ # check for the main message
+ if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+ echo "Message body doesn't match." >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # check userdata
+ if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+ echo "message userdata doesn't match" >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+ # test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..8c1597ebc2d3 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -149,7 +149,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
- check_fail $? "Managed to replace $name host entry"
+ check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..180c5eca556f 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -740,6 +740,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
+ # The entry should age out when it only forwards traffic
+ $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index 12e7cae251be..e907c2751956 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
- $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+ GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \