From e9a21949b79414dda42a017855b288901c07e613 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 8 Oct 2014 09:48:36 +0300 Subject: mac80211: add extended channel switching capability if the driver supports CSA The Extended Channel Switching capability bit in the extended capabilities element must be set if the driver supports CSA on non-beaconing interfaces. Since this capability needs to be set during driver registration, the extended_capabiliities global variable needs to be moved to the local structure so that it can be modified. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b1be39c76931..5fab17b382b5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1998,6 +1998,11 @@ enum ieee80211_tdls_actioncode { WLAN_TDLS_DISCOVERY_REQUEST = 10, }; +/* Extended Channel Switching capability to be set in the 1st byte of + * the @WLAN_EID_EXT_CAPABILITY information element + */ +#define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING BIT(2) + /* Interworking capabilities are set in 7th bit of 4th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ -- cgit From 6b358aedced8180830727258718c3916bef3e249 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 22 Oct 2014 20:26:44 +0200 Subject: phy: marvell: Add support for 88E3016 FastEthernet PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marvell 88E3016 is a FastEthernet PHY that also can be found in Marvell Berlin SoCs as integrated PHY. Tested-by: Antoine Ténart Reviewed-by: Florian Fainelli Signed-off-by: Sebastian Hesselbarth Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 8e9a029e093d..e6982ac3200d 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -16,6 +16,7 @@ #define MARVELL_PHY_ID_88E1318S 0x01410e90 #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 +#define MARVELL_PHY_ID_88E3016 0x01410e60 /* struct phy_device dev_flags definitions */ #define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001 -- cgit From e7de17abeda24d8acc316b2e07bd969d03099eea Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 22 Oct 2014 20:26:45 +0200 Subject: net: pxa168_eth: Provide phy_interface mode on platform_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PXA168 Ethernet IP support MII and RMII connection to its PHY. Currently, pxa168 platform_data does not provide a way to pass that and there is one user of pxa168 platform_data (mach-mmp/gplug). Given the pinctrl settings of gplug it uses RMII, so add and pass a corresponding phy_interface_t. Tested-by: Antoine Ténart Reviewed-by: Florian Fainelli Signed-off-by: Sebastian Hesselbarth Signed-off-by: David S. Miller --- include/linux/pxa168_eth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h index 18d75e795606..37c381120bc8 100644 --- a/include/linux/pxa168_eth.h +++ b/include/linux/pxa168_eth.h @@ -13,6 +13,7 @@ struct pxa168_eth_platform_data { */ int speed; /* 0, SPEED_10, SPEED_100 */ int duplex; /* DUPLEX_HALF or DUPLEX_FULL */ + phy_interface_t intf; /* * Override default RX/TX queue sizes if nonzero. -- cgit From b3020f0a35fc431f7acf3fba9a5b7376d79932e5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 25 Oct 2014 05:25:07 +0200 Subject: ieee802154: mac802154: remove FSF address This patch removes the FSF address in files which belongs to ieee802154 and mac802154. Signed-off-by: Alexander Aring Cc: Alan Ott Signed-off-by: Marcel Holtmann --- include/linux/nl802154.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 20163b9a0eae..167342c2ce6b 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -12,10 +12,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * */ #ifndef NL802154_H -- cgit From 4ca24aca55fe1e2a61f3ffaac9015d9c45204729 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sat, 25 Oct 2014 09:41:04 +0200 Subject: ieee802154: move ieee802154 header This patch moves the ieee802154 header into include/linux instead include/net. Similar like wireless which have the ieee80211 header inside of include/linux. Signed-off-by: Alexander Aring Cc: Alan Ott Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 189 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 include/linux/ieee802154.h (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h new file mode 100644 index 000000000000..2dfab2db103a --- /dev/null +++ b/include/linux/ieee802154.h @@ -0,0 +1,189 @@ +/* + * IEEE802.15.4-2003 specification + * + * Copyright (C) 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Pavel Smolenskiy + * Maxim Gorbachyov + * Maxim Osipov + * Dmitry Eremin-Solenikov + * Alexander Smirnov + */ + +#ifndef LINUX_IEEE802154_H +#define LINUX_IEEE802154_H + +#define IEEE802154_MTU 127 + +#define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ +#define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ +#define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ +#define IEEE802154_FC_TYPE_MAC_CMD 0x3 /* Frame is MAC command */ + +#define IEEE802154_FC_TYPE_SHIFT 0 +#define IEEE802154_FC_TYPE_MASK ((1 << 3) - 1) +#define IEEE802154_FC_TYPE(x) ((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT) +#define IEEE802154_FC_SET_TYPE(v, x) do { \ + v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \ + (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \ + } while (0) + +#define IEEE802154_FC_SECEN_SHIFT 3 +#define IEEE802154_FC_SECEN (1 << IEEE802154_FC_SECEN_SHIFT) +#define IEEE802154_FC_FRPEND_SHIFT 4 +#define IEEE802154_FC_FRPEND (1 << IEEE802154_FC_FRPEND_SHIFT) +#define IEEE802154_FC_ACK_REQ_SHIFT 5 +#define IEEE802154_FC_ACK_REQ (1 << IEEE802154_FC_ACK_REQ_SHIFT) +#define IEEE802154_FC_INTRA_PAN_SHIFT 6 +#define IEEE802154_FC_INTRA_PAN (1 << IEEE802154_FC_INTRA_PAN_SHIFT) + +#define IEEE802154_FC_SAMODE_SHIFT 14 +#define IEEE802154_FC_SAMODE_MASK (3 << IEEE802154_FC_SAMODE_SHIFT) +#define IEEE802154_FC_DAMODE_SHIFT 10 +#define IEEE802154_FC_DAMODE_MASK (3 << IEEE802154_FC_DAMODE_SHIFT) + +#define IEEE802154_FC_VERSION_SHIFT 12 +#define IEEE802154_FC_VERSION_MASK (3 << IEEE802154_FC_VERSION_SHIFT) +#define IEEE802154_FC_VERSION(x) ((x & IEEE802154_FC_VERSION_MASK) >> IEEE802154_FC_VERSION_SHIFT) + +#define IEEE802154_FC_SAMODE(x) \ + (((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT) + +#define IEEE802154_FC_DAMODE(x) \ + (((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT) + +#define IEEE802154_SCF_SECLEVEL_MASK 7 +#define IEEE802154_SCF_SECLEVEL_SHIFT 0 +#define IEEE802154_SCF_SECLEVEL(x) (x & IEEE802154_SCF_SECLEVEL_MASK) +#define IEEE802154_SCF_KEY_ID_MODE_SHIFT 3 +#define IEEE802154_SCF_KEY_ID_MODE_MASK (3 << IEEE802154_SCF_KEY_ID_MODE_SHIFT) +#define IEEE802154_SCF_KEY_ID_MODE(x) \ + ((x & IEEE802154_SCF_KEY_ID_MODE_MASK) >> IEEE802154_SCF_KEY_ID_MODE_SHIFT) + +#define IEEE802154_SCF_KEY_IMPLICIT 0 +#define IEEE802154_SCF_KEY_INDEX 1 +#define IEEE802154_SCF_KEY_SHORT_INDEX 2 +#define IEEE802154_SCF_KEY_HW_INDEX 3 + +#define IEEE802154_SCF_SECLEVEL_NONE 0 +#define IEEE802154_SCF_SECLEVEL_MIC32 1 +#define IEEE802154_SCF_SECLEVEL_MIC64 2 +#define IEEE802154_SCF_SECLEVEL_MIC128 3 +#define IEEE802154_SCF_SECLEVEL_ENC 4 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC32 5 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC64 6 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC128 7 + +/* MAC footer size */ +#define IEEE802154_MFR_SIZE 2 /* 2 octets */ + +/* MAC's Command Frames Identifiers */ +#define IEEE802154_CMD_ASSOCIATION_REQ 0x01 +#define IEEE802154_CMD_ASSOCIATION_RESP 0x02 +#define IEEE802154_CMD_DISASSOCIATION_NOTIFY 0x03 +#define IEEE802154_CMD_DATA_REQ 0x04 +#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY 0x05 +#define IEEE802154_CMD_ORPHAN_NOTIFY 0x06 +#define IEEE802154_CMD_BEACON_REQ 0x07 +#define IEEE802154_CMD_COORD_REALIGN_NOTIFY 0x08 +#define IEEE802154_CMD_GTS_REQ 0x09 + +/* + * The return values of MAC operations + */ +enum { + /* + * The requested operation was completed successfully. + * For a transmission request, this value indicates + * a successful transmission. + */ + IEEE802154_SUCCESS = 0x0, + + /* The beacon was lost following a synchronization request. */ + IEEE802154_BEACON_LOSS = 0xe0, + /* + * A transmission could not take place due to activity on the + * channel, i.e., the CSMA-CA mechanism has failed. + */ + IEEE802154_CHNL_ACCESS_FAIL = 0xe1, + /* The GTS request has been denied by the PAN coordinator. */ + IEEE802154_DENINED = 0xe2, + /* The attempt to disable the transceiver has failed. */ + IEEE802154_DISABLE_TRX_FAIL = 0xe3, + /* + * The received frame induces a failed security check according to + * the security suite. + */ + IEEE802154_FAILED_SECURITY_CHECK = 0xe4, + /* + * The frame resulting from secure processing has a length that is + * greater than aMACMaxFrameSize. + */ + IEEE802154_FRAME_TOO_LONG = 0xe5, + /* + * The requested GTS transmission failed because the specified GTS + * either did not have a transmit GTS direction or was not defined. + */ + IEEE802154_INVALID_GTS = 0xe6, + /* + * A request to purge an MSDU from the transaction queue was made using + * an MSDU handle that was not found in the transaction table. + */ + IEEE802154_INVALID_HANDLE = 0xe7, + /* A parameter in the primitive is out of the valid range.*/ + IEEE802154_INVALID_PARAMETER = 0xe8, + /* No acknowledgment was received after aMaxFrameRetries. */ + IEEE802154_NO_ACK = 0xe9, + /* A scan operation failed to find any network beacons.*/ + IEEE802154_NO_BEACON = 0xea, + /* No response data were available following a request. */ + IEEE802154_NO_DATA = 0xeb, + /* The operation failed because a short address was not allocated. */ + IEEE802154_NO_SHORT_ADDRESS = 0xec, + /* + * A receiver enable request was unsuccessful because it could not be + * completed within the CAP. + */ + IEEE802154_OUT_OF_CAP = 0xed, + /* + * A PAN identifier conflict has been detected and communicated to the + * PAN coordinator. + */ + IEEE802154_PANID_CONFLICT = 0xee, + /* A coordinator realignment command has been received. */ + IEEE802154_REALIGMENT = 0xef, + /* The transaction has expired and its information discarded. */ + IEEE802154_TRANSACTION_EXPIRED = 0xf0, + /* There is no capacity to store the transaction. */ + IEEE802154_TRANSACTION_OVERFLOW = 0xf1, + /* + * The transceiver was in the transmitter enabled state when the + * receiver was requested to be enabled. + */ + IEEE802154_TX_ACTIVE = 0xf2, + /* The appropriate key is not available in the ACL. */ + IEEE802154_UNAVAILABLE_KEY = 0xf3, + /* + * A SET/GET request was issued with the identifier of a PIB attribute + * that is not supported. + */ + IEEE802154_UNSUPPORTED_ATTR = 0xf4, + /* + * A request to perform a scan operation failed because the MLME was + * in the process of performing a previously initiated scan operation. + */ + IEEE802154_SCAN_IN_PROGRESS = 0xfc, +}; + + +#endif /* LINUX_IEEE802154_H */ -- cgit From fa491001e4edae5ed68a562b61ed729968a3ca1c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 27 Oct 2014 17:13:40 +0100 Subject: ieee802154: add valid psdu length helper This patch adds a generic valid psdu length check function helper. This is useful to check the length field after receiving. For example the at86rf231 doesn't filter invalid psdu length. Sometimes the CRC can also be correct. If we get the lqi value with an invalid frame length the kernel may crash because we dereference an invalid pointer in the receive buffer. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 2dfab2db103a..6e50a2a1d485 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -23,7 +23,10 @@ #ifndef LINUX_IEEE802154_H #define LINUX_IEEE802154_H +#include + #define IEEE802154_MTU 127 +#define IEEE802154_MIN_PSDU_LEN 5 #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ @@ -185,5 +188,13 @@ enum { IEEE802154_SCAN_IN_PROGRESS = 0xfc, }; +/** + * ieee802154_is_valid_psdu_len - check if psdu len is valid + * @len: psdu len with (MHR + payload + MFR) + */ +static inline bool ieee802154_is_valid_psdu_len(const u8 len) +{ + return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU); +} #endif /* LINUX_IEEE802154_H */ -- cgit From a59dadbeeaf7d33f2e92dbf5a290965d6df64162 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 25 Oct 2014 17:19:33 +0200 Subject: ath9k: add support for endian swap of eeprom from platform data On some devices (especially little-endian ones), the flash EEPROM data has a different endian, which needs to be detected. Add a flag to the platform data to allow overriding that behavior Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/ath9k_platform.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index a495a959e8a7..43501657bce9 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -31,6 +31,7 @@ struct ath9k_platform_data { u32 gpio_mask; u32 gpio_val; + bool endian_check; bool is_clk_25mhz; bool tx_gain_buffalo; -- cgit From 3468968ef766d7bb4ab29c0ef7ebd169a4ac2e96 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 25 Oct 2014 17:19:34 +0200 Subject: ath9k: allow disabling bands via platform data Some devices have multiple bands enables in the EEPROM data, even though they are only calibrated for one. Allow platform data to disable unsupported bands. Signed-off-by: Gabor Juhos Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/ath9k_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 43501657bce9..33eb274cd0e6 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -34,6 +34,8 @@ struct ath9k_platform_data { bool endian_check; bool is_clk_25mhz; bool tx_gain_buffalo; + bool disable_2ghz; + bool disable_5ghz; int (*get_mac_revision)(void); int (*external_reset)(void); -- cgit From 95f259ca3bf485a0c1f17d9024813d4aab485a23 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 25 Oct 2014 12:08:59 +0200 Subject: net: pxa168_eth: Fix providing of phy_interface mode on platform_data Do not add phy include to the board file but platform_data include instead. Signed-off-by: Sebastian Hesselbarth Signed-off-by: David S. Miller --- include/linux/pxa168_eth.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h index 37c381120bc8..e1ab6e86cdb3 100644 --- a/include/linux/pxa168_eth.h +++ b/include/linux/pxa168_eth.h @@ -4,6 +4,8 @@ #ifndef __LINUX_PXA168_ETH_H #define __LINUX_PXA168_ETH_H +#include + struct pxa168_eth_platform_data { int port_number; int phy_addr; -- cgit From 32a173c7f9e9ec2b87142f67e1478cd20084a45b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 27 Oct 2014 11:37:35 +0200 Subject: net/mlx4_core: Introduce mlx4_get_module_info for cable module info reading Added new MAD_IFC command to read cable module info with attribute id (0xFF60). Update include/linux/mlx4/device.h with function declaration (mlx4_get_module_info) and the needed defines/enums for future use. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 37e4404d0227..73910daec317 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -379,6 +379,13 @@ enum { #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) +enum mlx4_module_id { + MLX4_MODULE_ID_SFP = 0x3, + MLX4_MODULE_ID_QSFP = 0xC, + MLX4_MODULE_ID_QSFP_PLUS = 0xD, + MLX4_MODULE_ID_QSFP28 = 0x11, +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -799,6 +806,26 @@ struct mlx4_init_port_param { u64 si_guid; }; +#define MAD_IFC_DATA_SZ 192 +/* MAD IFC Mailbox */ +struct mlx4_mad_ifc { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; + u8 reserved[28]; + u8 data[MAD_IFC_DATA_SZ]; +} __packed; + #define mlx4_foreach_port(port, dev, type) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) @@ -1283,6 +1310,9 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, u64 iova, u64 size, int npages, int page_shift, struct mlx4_mpt_entry *mpt_entry); +int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, + u16 offset, u16 size, u8 *data); + /* Returns true if running in low memory profile (kdump kernel) */ static inline bool mlx4_low_memory_profile(void) { -- cgit From adbc7ac5c15eb5e9d70393428345e72a1a897d6a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 27 Oct 2014 11:37:37 +0200 Subject: net/mlx4_core: Introduce ACCESS_REG CMD and eth_prot_ctrl dev cap Adding ACCESS REG mlx4 command and use it to implement Query method for PTYS (Port Type and Speed Register). Query and store eth_prot_ctrl dev cap. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 ++ include/linux/mlx4/device.h | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 379c02648ab3..ff5f5deb3dcf 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -67,6 +67,8 @@ enum { MLX4_CMD_MAP_ICM_AUX = 0xffc, MLX4_CMD_UNMAP_ICM_AUX = 0xffb, MLX4_CMD_SET_ICM_SIZE = 0xffd, + MLX4_CMD_ACCESS_REG = 0x3b, + /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, MLX4_CMD_GET_OP_REQ = 0x59, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 73910daec317..181cd9fc90f2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -186,7 +186,8 @@ enum { MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10, MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11, MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12, - MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13 + MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, + MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14 }; enum { @@ -1319,4 +1320,41 @@ static inline bool mlx4_low_memory_profile(void) return is_kdump_kernel(); } +/* ACCESS REG commands */ +enum mlx4_access_reg_method { + MLX4_ACCESS_REG_QUERY = 0x1, + MLX4_ACCESS_REG_WRITE = 0x2, +}; + +/* ACCESS PTYS Reg command */ +enum mlx4_ptys_proto { + MLX4_PTYS_IB = 1<<0, + MLX4_PTYS_EN = 1<<2, +}; + +struct mlx4_ptys_reg { + u8 resrvd1; + u8 local_port; + u8 resrvd2; + u8 proto_mask; + __be32 resrvd3[2]; + __be32 eth_proto_cap; + __be16 ib_width_cap; + __be16 ib_speed_cap; + __be32 resrvd4; + __be32 eth_proto_admin; + __be16 ib_width_admin; + __be16 ib_speed_admin; + __be32 resrvd5; + __be32 eth_proto_oper; + __be16 ib_width_oper; + __be16 ib_speed_oper; + __be32 resrvd6; + __be32 eth_proto_lp_adv; +} __packed; + +int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, + enum mlx4_access_reg_method method, + struct mlx4_ptys_reg *ptys_reg); + #endif /* MLX4_DEVICE_H */ -- cgit From a53e3e8c1db547981e13d1ebf24a659bd4e87710 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 27 Oct 2014 11:37:38 +0200 Subject: net/mlx4_core: Add ethernet backplane autoneg device capability Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 181cd9fc90f2..e4c136ebe79b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -187,7 +187,8 @@ enum { MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11, MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12, MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, - MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14 + MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, + MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15 }; enum { -- cgit From dca145ffaa8d39ea1904491ac81b92b7049372c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Oct 2014 21:45:24 -0700 Subject: tcp: allow for bigger reordering level While testing upcoming Yaogong patch (converting out of order queue into an RB tree), I hit the max reordering level of linux TCP stack. Reordering level was limited to 127 for no good reason, and some network setups [1] can easily reach this limit and get limited throughput. Allow a new max limit of 300, and add a sysctl to allow admins to even allow bigger (or lower) values if needed. [1] Aggregation of links, per packet load balancing, fabrics not doing deep packet inspections, alternative TCP congestion modules... Signed-off-by: Eric Dumazet Cc: Yaogong Wang Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c2dee7deefa8..f566b8567892 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -204,10 +204,10 @@ struct tcp_sock { u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ - u8 reordering; /* Packet reordering metric. */ + u8 keepalive_probes; /* num of allowed keep alive probes */ + u32 reordering; /* Packet reordering metric. */ u32 snd_up; /* Urgent pointer */ - u8 keepalive_probes; /* num of allowed keep alive probes */ /* * Options received (usually on last packet, some only on SYN packets). */ -- cgit From 7fd2561e4ebdd070ebba6d3326c4c5b13942323f Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Tue, 28 Oct 2014 18:11:14 +0900 Subject: net: ipv6: Add a sysctl to make optimistic addresses useful candidates Add a sysctl that causes an interface's optimistic addresses to be considered equivalent to other non-deprecated addresses for source address selection purposes. Preferred addresses will still take precedence over optimistic addresses, subject to other ranking in the source address selection algorithm. This is useful where different interfaces are connected to different networks from different ISPs (e.g., a cell network and a home wifi network). The current behaviour complies with RFC 3484/6724, and it makes sense if the host has only one interface, or has multiple interfaces on the same network (same or cooperating administrative domain(s), but not in the multiple distinct networks case. For example, if a mobile device has an IPv6 address on an LTE network and then connects to IPv6-enabled wifi, while the wifi IPv6 address is undergoing DAD, IPv6 connections will try use the wifi default route with the LTE IPv6 address, and will get stuck until they time out. Also, because optimistic nodes can receive frames, issue an RTM_NEWADDR as soon as DAD starts (with the IFA_F_OPTIMSTIC flag appropriately set). A second RTM_NEWADDR is sent if DAD completes (the address flags have changed), otherwise an RTM_DELADDR is sent. Also: add an entry in ip-sysctl.txt for optimistic_dad. Signed-off-by: Erik Kline Acked-by: Lorenzo Colitti Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ff560537dd61..7121a2e97ce2 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -42,6 +42,7 @@ struct ipv6_devconf { __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; + __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE __s32 mc_forwarding; -- cgit From bc9ad166e38ae1cdcb5323a8aa45dff834d68bfa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2014 18:05:13 -0700 Subject: net: introduce napi_schedule_irqoff() napi_schedule() can be called from any context and has to mask hard irqs. Add a variant that can only be called from hard interrupts handlers or when irqs are already masked. Many NIC drivers can use it from their hard IRQ handler instead of generic variant. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 74fd5d37f15a..c85e06512246 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -386,6 +386,7 @@ typedef enum rx_handler_result rx_handler_result_t; typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); void __napi_schedule(struct napi_struct *n); +void __napi_schedule_irqoff(struct napi_struct *n); static inline bool napi_disable_pending(struct napi_struct *n) { @@ -420,6 +421,18 @@ static inline void napi_schedule(struct napi_struct *n) __napi_schedule(n); } +/** + * napi_schedule_irqoff - schedule NAPI poll + * @n: napi context + * + * Variant of napi_schedule(), assuming hard irqs are masked. + */ +static inline void napi_schedule_irqoff(struct napi_struct *n) +{ + if (napi_schedule_prep(n)) + __napi_schedule_irqoff(n); +} + /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ static inline bool napi_reschedule(struct napi_struct *napi) { -- cgit From cb904b0a16305f9b2a98200cc6eb9dc3610278b0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 2 Nov 2014 04:18:45 +0100 Subject: ieee802154: add extended address validation helper This patch introduce an extended address validation helper to check if an extended address is valid or not. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 6e50a2a1d485..9bba5ca7f0ad 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -197,4 +197,18 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len) return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU); } +/** + * ieee802154_is_valid_psdu_len - check if extended addr is valid + * @addr: extended addr to check + */ +static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) +{ + /* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff + * is used internally as extended to short address broadcast mapping. + * This is currently a workaround because neighbor discovery can't + * deal with short addresses types right now. + */ + return ((addr != 0x0000000000000000) || (addr != 0xffffffffffffffff)); +} + #endif /* LINUX_IEEE802154_H */ -- cgit From a4164eb4dd3f4f2a22f8bf7b26394e8384f3d9a2 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 2 Nov 2014 21:43:01 +0100 Subject: ieee802154: add missing ULL definition Running make C=2 occurs warning: constant 0xffffffffffffffff is so big it is unsigned long This patch fix this warning by adding a ULL to the constant definitions. Signed-off-by: Alexander Aring Reported-by: Marcel Holtmann Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 9bba5ca7f0ad..9da7c011fbba 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -208,7 +208,8 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) * This is currently a workaround because neighbor discovery can't * deal with short addresses types right now. */ - return ((addr != 0x0000000000000000) || (addr != 0xffffffffffffffff)); + return ((addr != 0x0000000000000000ULL) || + (addr != 0xffffffffffffffffULL)); } #endif /* LINUX_IEEE802154_H */ -- cgit From c28bee84c5c49312befe1b442e1044ac2392d80d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 2 Nov 2014 21:43:02 +0100 Subject: ieee802154: fix byteorder issues This patch fix byteorder issues which occurs because we compare __le64 with an host byteorder value. Simple add a cpu_to_le64 to convert the host byteorder values to __le64. Signed-off-by: Alexander Aring Reported-by: Marcel Holtmann Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 9da7c011fbba..5d9e7459d94b 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -24,6 +24,7 @@ #define LINUX_IEEE802154_H #include +#include #define IEEE802154_MTU 127 #define IEEE802154_MIN_PSDU_LEN 5 @@ -208,8 +209,8 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) * This is currently a workaround because neighbor discovery can't * deal with short addresses types right now. */ - return ((addr != 0x0000000000000000ULL) || - (addr != 0xffffffffffffffffULL)); + return ((addr != cpu_to_le64(0x0000000000000000ULL)) || + (addr != cpu_to_le64(0xffffffffffffffffULL))); } #endif /* LINUX_IEEE802154_H */ -- cgit From 4cdb1e2e3d3495423db558d3bb7ed11d66aabce7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Nov 2014 06:00:12 -0800 Subject: net: shrink struct softnet_data flow_limit in struct softnet_data is only read from local cpu and can be moved to fill a hole, reducing softnet_data size by 64 bytes on x86_64 While we are at it, move output_queue, output_queue_tailp and completion_queue, so that rx / tx paths touch a single cache line. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c85e06512246..5ed05bd764dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2329,10 +2329,7 @@ extern int netdev_flow_limit_table_len; * Incoming packets are placed on per-cpu queues */ struct softnet_data { - struct Qdisc *output_queue; - struct Qdisc **output_queue_tailp; struct list_head poll_list; - struct sk_buff *completion_queue; struct sk_buff_head process_queue; /* stats */ @@ -2340,10 +2337,17 @@ struct softnet_data { unsigned int time_squeeze; unsigned int cpu_collision; unsigned int received_rps; - #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; +#endif +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit __rcu *flow_limit; +#endif + struct Qdisc *output_queue; + struct Qdisc **output_queue_tailp; + struct sk_buff *completion_queue; +#ifdef CONFIG_RPS /* Elements below can be accessed between CPUs for RPS */ struct call_single_data csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; @@ -2355,9 +2359,6 @@ struct softnet_data { struct sk_buff_head input_pkt_queue; struct napi_struct backlog; -#ifdef CONFIG_NET_FLOW_LIMIT - struct sd_flow_limit __rcu *flow_limit; -#endif }; static inline void input_queue_head_incr(struct softnet_data *sd) -- cgit From d475c95b4bcff983ac76e8522bfd2d29bcc567d0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 2 Nov 2014 16:26:17 +0200 Subject: net/mlx4_core: Add retrieval of CONFIG_DEV parameters Add code to issue CONFIG_DEV "get" firmware command. This command is used in order to obtain certain parameters used for supporting various RX checksumming options and vxlan UDP port. The GET operation is allowed for VFs too. Signed-off-by: Matan Barak Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 29 +++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 ++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index ff5f5deb3dcf..64d25941b329 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -199,6 +199,33 @@ enum { MLX4_CMD_NATIVE }; +/* + * MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP - + * Receive checksum value is reported in CQE also for non TCP/UDP packets. + * + * MLX4_RX_CSUM_MODE_L4 - + * L4_CSUM bit in CQE, which indicates whether or not L4 checksum + * was validated correctly, is supported. + * + * MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP - + * IP_OK CQE's field is supported also for non TCP/UDP IP packets. + * + * MLX4_RX_CSUM_MODE_MULTI_VLAN - + * Receive Checksum offload is supported for packets with more than 2 vlan headers. + */ +enum mlx4_rx_csum_mode { + MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP = 1UL << 0, + MLX4_RX_CSUM_MODE_L4 = 1UL << 1, + MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP = 1UL << 2, + MLX4_RX_CSUM_MODE_MULTI_VLAN = 1UL << 3 +}; + +struct mlx4_config_dev_params { + u16 vxlan_udp_dport; + u8 rx_csum_flags_port_1; + u8 rx_csum_flags_port_2; +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { @@ -250,6 +277,8 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); +int mlx4_config_dev_retrieval(struct mlx4_dev *dev, + struct mlx4_config_dev_params *params); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4c136ebe79b..5cc5eac47d1b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -188,7 +188,8 @@ enum { MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 12, MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, - MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15 + MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, + MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16 }; enum { -- cgit From 56b174256b6936ec4c1ed8f3407109ac6929d3ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Nov 2014 08:19:53 -0800 Subject: net: add rbnode to struct sk_buff Yaogong replaces TCP out of order receive queue by an RB tree. As netem already does a private skb->{next/prev/tstamp} union with a 'struct rb_node', lets do this in a cleaner way. Signed-off-by: Eric Dumazet Cc: Yaogong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6c8b6f604e76..5ad9675b6fe1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -440,6 +441,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -504,15 +506,19 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, */ struct sk_buff { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - union { - ktime_t tstamp; - struct skb_mstamp skb_mstamp; + struct { + /* These two members must be first. */ + struct sk_buff *next; + struct sk_buff *prev; + + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; + }; + struct rb_node rbnode; /* used in netem & tcp stack */ }; - struct sock *sk; struct net_device *dev; -- cgit From 98a18b6ffc79baa69f4a0d1bae58faf2a8aef4c8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 2 Nov 2014 06:44:54 +0100 Subject: netdevice: add ieee802154_ptr to net_device This patch adds an ieee802154_ptr to the net_device structure. Furthermore the 802.15.4 subsystem will introduce a nl802154 framework which is similar like the nl80211 framework and a wpan_dev structure. The wpan_dev structure will hold additional net_device attributes like address options which are 802.15.4 specific. In the upcoming nl802154 implementation we will introduce a NL802154_FLAG_NEED_WPAN_DEV like NL80211_FLAG_NEED_WDEV. For this flag an ieee802154_ptr in net_device is needed. Additional we can access the wpan_dev attributes in upper layers like IEEE 802.15.4 6LoWPAN easily. Current solution is a complicated callback interface and getting these values over subif data structure in mac802154. Signed-off-by: Alexander Aring Acked-by: David S. Miller Signed-off-by: Marcel Holtmann --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 74fd5d37f15a..c9bcf33efb47 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -57,6 +57,8 @@ struct device; struct phy_device; /* 802.11 specific */ struct wireless_dev; +/* 802.15.4 specific */ +struct wpan_dev; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1572,6 +1574,7 @@ struct net_device { struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; struct wireless_dev *ieee80211_ptr; + struct wpan_dev *ieee802154_ptr; /* * Cache lines mostly used on receive path (including eth_type_trans()) -- cgit From 0563921abf01a7a38b5f670c3de05dc0b0b8617d Mon Sep 17 00:00:00 2001 From: Eran Harary Date: Mon, 3 Nov 2014 20:06:47 +0200 Subject: ieee80211: add "max length of AMPDU" enum for VHT Maximum length of AMPDU that an STA can receive in VHT. length = 2 ^ (13 + max_ampdu_length_exp) - 1. Signed-off-by: Eran Harary Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5fab17b382b5..f65b5446d983 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1274,7 +1274,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT 2 /* - * Maximum length of AMPDU that the STA can receive. + * Maximum length of AMPDU that the STA can receive in high-throughput (HT). * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) */ enum ieee80211_max_ampdu_length_exp { @@ -1284,6 +1284,21 @@ enum ieee80211_max_ampdu_length_exp { IEEE80211_HT_MAX_AMPDU_64K = 3 }; +/* + * Maximum length of AMPDU that the STA can receive in VHT. + * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) + */ +enum ieee80211_vht_max_ampdu_length_exp { + IEEE80211_VHT_MAX_AMPDU_8K = 0, + IEEE80211_VHT_MAX_AMPDU_16K = 1, + IEEE80211_VHT_MAX_AMPDU_32K = 2, + IEEE80211_VHT_MAX_AMPDU_64K = 3, + IEEE80211_VHT_MAX_AMPDU_128K = 4, + IEEE80211_VHT_MAX_AMPDU_256K = 5, + IEEE80211_VHT_MAX_AMPDU_512K = 6, + IEEE80211_VHT_MAX_AMPDU_1024K = 7 +}; + #define IEEE80211_HT_MAX_AMPDU_FACTOR 13 /* Minimum MPDU start spacing */ -- cgit From 0d8a52f933f817d0b62955a5a362fb7f2508f06c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Nov 2014 11:55:09 +0300 Subject: ieee802154: || vs && in ieee802154_is_valid_extended_addr() The ieee802154_is_valid_extended_addr() always returns true because there is a typo. The || should be &&. Neither 0x0000000000000000ULL nor 0xffffffffffffffffULL are valid addresses. Signed-off-by: Dan Carpenter Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 5d9e7459d94b..4c032863cd71 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -209,7 +209,7 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) * This is currently a workaround because neighbor discovery can't * deal with short addresses types right now. */ - return ((addr != cpu_to_le64(0x0000000000000000ULL)) || + return ((addr != cpu_to_le64(0x0000000000000000ULL)) && (addr != cpu_to_le64(0xffffffffffffffffULL))); } -- cgit From 1906bbbddbe085b19be2c21cd132335260f551c3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 5 Nov 2014 20:51:21 +0100 Subject: ieee802154: add IEEE802154_EXTENDED_ADDR_LEN This patch adds a new define for getting the length of an extended address. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 4c032863cd71..a907fe59b1d1 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -29,6 +29,8 @@ #define IEEE802154_MTU 127 #define IEEE802154_MIN_PSDU_LEN 5 +#define IEEE802154_EXTENDED_ADDR_LEN 8 + #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ #define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ -- cgit From 35d5a374a559a1ba9c6810739cf3ad1d672c2de2 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 5 Nov 2014 20:51:22 +0100 Subject: ieee802154: add ieee802154_random_extended_addr This patch adds a new function to generate a random IEEE 802.15.4 extended address. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index a907fe59b1d1..d043449a079d 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -24,6 +24,7 @@ #define LINUX_IEEE802154_H #include +#include #include #define IEEE802154_MTU 127 @@ -215,4 +216,17 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) (addr != cpu_to_le64(0xffffffffffffffffULL))); } +/** + * ieee802154_random_extended_addr - generates a random extended address + * @addr: extended addr pointer to place the random address + */ +static inline void ieee802154_random_extended_addr(__le64 *addr) +{ + get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); + + /* toggle some bit if we hit an invalid extended addr */ + if (!ieee802154_is_valid_extended_addr(*addr)) + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01; +} + #endif /* LINUX_IEEE802154_H */ -- cgit From e585f23636370320bc2071ca5ba2744ae37c3e51 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 4 Nov 2014 09:06:54 -0800 Subject: udp: Changes to udp_offload to support remote checksum offload Add a new GSO type, SKB_GSO_TUNNEL_REMCSUM, which indicates remote checksum offload being done (in this case inner checksum must not be offloaded to the NIC). Added logic in __skb_udp_tunnel_segment to handle remote checksum offload case. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 +++- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index dcfdecbfa0b7..8c94b07e654a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -48,8 +48,9 @@ enum { NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ + NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_MPLS_BIT, + NETIF_F_GSO_TUNNEL_REMCSUM_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ @@ -119,6 +120,7 @@ enum { #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS) +#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ed05bd764dc..4767f546d7c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3584,6 +3584,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5ad9675b6fe1..74ed34413969 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -373,6 +373,7 @@ enum { SKB_GSO_MPLS = 1 << 12, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, }; #if BITS_PER_LONG > 32 @@ -603,7 +604,8 @@ struct sk_buff { #endif __u8 ipvs_property:1; __u8 inner_protocol_type:1; - /* 4 or 6 bit hole */ + __u8 remcsum_offload:1; + /* 3 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -- cgit From 51f3d02b980a338cd291d2bc7629cdfb2568424b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2014 16:46:40 -0500 Subject: net: Add and use skb_copy_datagram_msg() helper. This encapsulates all of the skb_copy_datagram_iovec() callers with call argument signature "skb, offset, msghdr->msg_iov, length". When we move to iov_iters in the networking, the iov_iter object will sit in the msghdr. Having a helper like this means there will be less places to touch during that transformation. Based upon descriptions and patch from Al Viro. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 74ed34413969..39ec7530ae27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -2639,6 +2640,11 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to, int size); +static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, + struct msghdr *msg, int size) +{ + return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size); +} int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, -- cgit From 25de4668d094f00e44a8f2428dd3c1a4ecfa0053 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 4 Nov 2014 10:59:47 -0800 Subject: ipv6: move INET6_MATCH() to include/net/inet6_hashtables.h It is only used in net/ipv6/inet6_hashtables.c. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/ipv6.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7121a2e97ce2..c694e7baa621 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -317,14 +317,4 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ - -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ - net_eq(sock_net(__sk), (__net))) - #endif /* _IPV6_H */ -- cgit From e5a2c899957659cd1a9f789bc462f9c0b35f5150 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 5 Nov 2014 00:23:04 +0100 Subject: fast_hash: avoid indirect function calls By default the arch_fast_hash hashing function pointers are initialized to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get updated to the CRC32 ones. This dispatching scheme incurs a function pointer lookup and indirect call for every hashing operation. rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing functions in its structure, too, causing two indirect branches per hashing operation. Using alternative_call we can get away with one of those indirect branches. Acked-by: Daniel Borkmann Cc: Thomas Graf Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/hash.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index d0494c399392..6e8fb028848c 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr) return (u32)val; } -struct fast_hash_ops { - u32 (*hash)(const void *data, u32 len, u32 seed); - u32 (*hash2)(const u32 *data, u32 len, u32 seed); -}; - -/** - * arch_fast_hash - Caclulates a hash over a given buffer that can have - * arbitrary size. This function will eventually use an - * architecture-optimized hashing implementation if - * available, and trades off distribution for speed. - * - * @data: buffer to hash - * @len: length of buffer in bytes - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash(const void *data, u32 len, u32 seed); - -/** - * arch_fast_hash2 - Caclulates a hash over a given buffer that has a - * size that is of a multiple of 32bit words. This - * function will eventually use an architecture- - * optimized hashing implementation if available, - * and trades off distribution for speed. - * - * @data: buffer to hash (must be 32bit padded) - * @len: number of 32bit words - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed); - #endif /* _LINUX_HASH_H */ -- cgit From 59b93b41e7fa71138734a911b11b044340dd16bd Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 5 Nov 2014 15:27:48 -0800 Subject: net: Remove MPLS GSO feature. Device can export MPLS GSO support in dev->mpls_features same way it export vlan features in dev->vlan_features. So it is safe to remove NETIF_F_GSO_MPLS redundant flag. Signed-off-by: Pravin B Shelar --- include/linux/netdev_features.h | 5 +---- include/linux/netdevice.h | 1 - include/linux/skbuff.h | 4 +--- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 8c94b07e654a..8e30685affeb 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -47,7 +47,6 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ - NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -119,7 +118,6 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) -#define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -183,7 +181,6 @@ enum { NETIF_F_GSO_IPIP | \ NETIF_F_GSO_SIT | \ NETIF_F_GSO_UDP_TUNNEL | \ - NETIF_F_GSO_UDP_TUNNEL_CSUM | \ - NETIF_F_GSO_MPLS) + NETIF_F_GSO_UDP_TUNNEL_CSUM) #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4767f546d7c0..90ac95900a11 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3583,7 +3583,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39ec7530ae27..53f4f6c93356 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -372,9 +372,7 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_MPLS = 1 << 12, - - SKB_GSO_TUNNEL_REMCSUM = 1 << 13, + SKB_GSO_TUNNEL_REMCSUM = 1 << 12, }; #if BITS_PER_LONG > 32 -- cgit From a8f820aa4066d2c97e75ecd1bbca8a7920b66f2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Nov 2014 21:22:22 +0800 Subject: inet: Add skb_copy_datagram_iter This patch adds skb_copy_datagram_iter, which is identical to skb_copy_datagram_iovec except that it operates on iov_iter instead of iovec. Eventually all users of skb_copy_datagram_iovec should switch over to iov_iter and then we can remove skb_copy_datagram_iovec. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 53f4f6c93356..933cfce7fcd9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -150,6 +150,7 @@ struct net_device; struct scatterlist; struct pipe_inode_info; +struct iov_iter; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -2653,6 +2654,8 @@ int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, const struct iovec *to, int to_offset, int size); +int skb_copy_datagram_iter(const struct sk_buff *from, int offset, + struct iov_iter *to, int size); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -- cgit From bfe1be38fcee0e13ad53175d0b530707c20f93ec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Nov 2014 21:22:26 +0800 Subject: net: Kill skb_copy_datagram_const_iovec Now that both macvtap and tun are using skb_copy_datagram_iter, we can kill the abomination that is skb_copy_datagram_const_iovec. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 933cfce7fcd9..103fbe8113f8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2651,9 +2651,6 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, int len); int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, int offset, size_t count); -int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, - const struct iovec *to, int to_offset, - int size); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -- cgit From 3b47d30396bae4f0bd1ff0dbcd7c4f5077e7df4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2014 21:09:44 -0800 Subject: net: gro: add a per device gro flush timer Tuning coalescing parameters on NIC can be really hard. Servers can handle both bulk and RPC like traffic, with conflicting goals : bulk flows want as big GRO packets as possible, RPC want minimal latencies. To reach big GRO packets on 10Gbe NIC, one can use : ethtool -C eth0 rx-usecs 4 rx-frames 44 But this penalizes rpc sessions, with an increase of latencies, up to 50% in some cases, as NICs generally do not force an interrupt when a packet with TCP Push flag is received. Some NICs do not have an absolute timer, only a timer rearmed for every incoming packet. This patch uses a different strategy : Let GRO stack decides what do do, based on traffic pattern. Packets with Push flag wont be delayed. Packets without Push flag might be held in GRO engine, if we keep receiving data. This new mechanism is off by default, and shall be enabled by setting /sys/class/net/ethX/gro_flush_timeout to a value in nanosecond. To fully enable this mechanism, drivers should use napi_complete_done() instead of napi_complete(). Tested: Ran 200 netperf TCP_STREAM from A to B (10Gbe mlx4 link, 8 RX queues) Without this feature, we send back about 305,000 ACK per second. GRO aggregation ratio is low (811/305 = 2.65 segments per GRO packet) Setting a timer of 2000 nsec is enough to increase GRO packet sizes and reduce number of ACK packets. (811/19.2 = 42) Receiver performs less calls to upper stacks, less wakes up. This also reduces cpu usage on the sender, as it receives less ACK packets. Note that reducing number of wakes up increases cpu efficiency, but can decrease QPS, as applications wont have the chance to warmup cpu caches doing a partial read of RPC requests/answers if they fit in one skb. B:~# sar -n DEV 1 10 | grep eth0 | tail -1 Average: eth0 811269.80 305732.30 1199462.57 19705.72 0.00 0.00 0.50 B:~# echo 2000 >/sys/class/net/eth0/gro_flush_timeout B:~# sar -n DEV 1 10 | grep eth0 | tail -1 Average: eth0 811577.30 19230.80 1199916.51 1239.80 0.00 0.00 0.50 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 90ac95900a11..888d5513fa4a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,6 +314,7 @@ struct napi_struct { struct net_device *dev; struct sk_buff *gro_list; struct sk_buff *skb; + struct hrtimer timer; struct list_head dev_list; struct hlist_node napi_hash_node; unsigned int napi_id; @@ -443,14 +444,19 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } +void __napi_complete(struct napi_struct *n); +void napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete * @n: napi context * * Mark NAPI processing as complete. + * Consider using napi_complete_done() instead. */ -void __napi_complete(struct napi_struct *n); -void napi_complete(struct napi_struct *n); +static inline void napi_complete(struct napi_struct *n) +{ + return napi_complete_done(n, 0); +} /** * napi_by_id - lookup a NAPI by napi_id @@ -485,14 +491,7 @@ void napi_hash_del(struct napi_struct *napi); * Stop NAPI from being scheduled on this context. * Waits till any outstanding processing completes. */ -static inline void napi_disable(struct napi_struct *n) -{ - might_sleep(); - set_bit(NAPI_STATE_DISABLE, &n->state); - while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); - clear_bit(NAPI_STATE_DISABLE, &n->state); -} +void napi_disable(struct napi_struct *n); /** * napi_enable - enable NAPI scheduling @@ -1603,6 +1602,7 @@ struct net_device { #endif + unsigned long gro_flush_timeout; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; -- cgit From f8c6455bb04b944edb69e9b074e28efee2c56bdd Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Sun, 9 Nov 2014 13:51:53 +0200 Subject: net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE When processing received traffic, pass CHECKSUM_COMPLETE status to the stack, with calculated checksum for non TCP/UDP packets (such as GRE or ICMP). Although the stack expects checksum which doesn't include the pseudo header, the HW adds it. To address that, we are subtracting the pseudo header checksum from the checksum value provided by the HW. In the IPv6 case, we also compute/add the IP header checksum which is not added by the HW for such packets. Cc: Jerry Chu Signed-off-by: Shani Michaeli Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5cc5eac47d1b..3d9bff00f24a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -497,6 +497,7 @@ struct mlx4_caps { u16 hca_core_clock; u64 phys_port_id[MLX4_MAX_PORTS + 1]; int tunnel_offload_mode; + u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { -- cgit From 09626e9d153326ca82568e4e27f2daa53713992e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 11 Nov 2014 13:29:42 -0800 Subject: net: kill netif_copy_real_num_queues() vlan was the only user of netif_copy_real_num_queues(), but it no longer calls it after commit 4af429d29b341bb1735f04c2fb960178 ("vlan: lockless transmit path"). So we can just remove it. Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 888d5513fa4a..4a6f770377d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2762,23 +2762,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, } #endif -static inline int netif_copy_real_num_queues(struct net_device *to_dev, - const struct net_device *from_dev) -{ - int err; - - err = netif_set_real_num_tx_queues(to_dev, - from_dev->real_num_tx_queues); - if (err) - return err; -#ifdef CONFIG_SYSFS - return netif_set_real_num_rx_queues(to_dev, - from_dev->real_num_rx_queues); -#else - return 0; -#endif -} - #ifdef CONFIG_SYSFS static inline unsigned int get_netdev_rx_queue_index( struct netdev_rx_queue *queue) -- cgit From 85eb92e81801d64686eb78928d500a4c83ee9623 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 1 Nov 2014 16:54:55 +0100 Subject: bcma: make it possible to specify a IRQ num in bcma_core_irq() This moves bcma_core_irq() to main.c and add a extra parameter with a number so that we can return different irq number for devices with more than one. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- include/linux/bcma/bcma.h | 2 ++ include/linux/bcma/bcma_driver_mips.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 729f48e6b20b..eb1c6a47b67f 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -447,4 +447,6 @@ extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset); #define BCMA_DMA_TRANSLATION_DMA64_CMT 0x80000000 /* Client Mode Translation for 64-bit DMA */ extern u32 bcma_core_dma_translation(struct bcma_device *core); +extern unsigned int bcma_core_irq(struct bcma_device *core, int num); + #endif /* LINUX_BCMA_H_ */ diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index fb61f3fb4ddb..0b3b32aeeb8a 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -43,12 +43,12 @@ struct bcma_drv_mips { extern void bcma_core_mips_init(struct bcma_drv_mips *mcore); extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); -extern unsigned int bcma_core_irq(struct bcma_device *core); +extern unsigned int bcma_core_mips_irq(struct bcma_device *dev); #else static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { } static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } -static inline unsigned int bcma_core_irq(struct bcma_device *core) +static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev) { return 0; } -- cgit From 702bf371282f5912fe53f0b247fa2d7df9d7951f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Nov 2014 03:36:57 +0100 Subject: ieee820154: add pan_id setting support This patch adds support for setting pan_id via nl802154 framework. Adding a comment because setting 0xffff as pan_id seems to be valid setting. The pan_id 0xffff as source pan is invalid. I am not sure now about this setting but for the current netlink interface this is an invalid setting, so we do the same now. Maybe we need to change that when we have coordinator support and association support. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index d043449a079d..d40379876b84 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -30,6 +30,8 @@ #define IEEE802154_MTU 127 #define IEEE802154_MIN_PSDU_LEN 5 +#define IEEE802154_PAN_ID_BROADCAST 0xffff + #define IEEE802154_EXTENDED_ADDR_LEN 8 #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ -- cgit From 9830c62a0b3d57d9d00880989cfe987f581bc03f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Nov 2014 03:36:58 +0100 Subject: ieee820154: add short_addr setting support This patch adds support for setting short address via nl802154 framework. Also added a comment because a 0xffff seems to be valid address that we don't have a short address. This is a valid setting but we need more checks in upper layers to don't allow this address as source address. Also the current netlink interface doesn't allow to set the short_addr to 0xffff. Same for the 0xfffe short address which describes a not allocated short address. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index d40379876b84..ce0f96a55976 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -31,6 +31,8 @@ #define IEEE802154_MIN_PSDU_LEN 5 #define IEEE802154_PAN_ID_BROADCAST 0xffff +#define IEEE802154_ADDR_SHORT_BROADCAST 0xffff +#define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 -- cgit From 71dfda58aaaf4bf6b1bc59f9d8afa635fa1337d4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 11 Nov 2014 09:26:34 -0800 Subject: net: Add device Rx page allocation function This patch implements __dev_alloc_pages and __dev_alloc_page. These are meant to replace the __skb_alloc_pages and __skb_alloc_page functions. The reason for doing this is that it occurred to me that __skb_alloc_page is supposed to be passed an sk_buff pointer, but it is NULL in all cases where it is used. Worse is that in the case of ixgbe it is passed NULL via the sk_buff pointer in the rx_buffer info structure which means the compiler is not correctly stripping it out. The naming for these functions is based on dev_alloc_skb and __dev_alloc_skb. There was originally a netdev_alloc_page, however that was passed a net_device pointer and this function is not so I thought it best to follow that naming scheme since that is the same difference between dev_alloc_skb and netdev_alloc_skb. In the case of anything greater than order 0 it is assumed that we want a compound page so __GFP_COMP is set for all allocations as we expect a compound page when assigning a page frag. The other change in this patch is to exploit the behaviors of the page allocator in how it handles flags. So for example we can always set __GFP_COMP and __GFP_MEMALLOC since they are ignored if they are not applicable or are overridden by another flag. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 103fbe8113f8..2e5221f1ec72 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2184,6 +2184,54 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +/** + * __dev_alloc_pages - allocate page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx + * @order: size of the allocation + * + * Allocate a new page. + * + * %NULL is returned if there is no free memory. +*/ +static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, + unsigned int order) +{ + /* This piece of code contains several assumptions. + * 1. This is for device Rx, therefor a cold page is preferred. + * 2. The expectation is the user wants a compound page. + * 3. If requesting a order 0 page it will not be compound + * due to the check to see if order has a value in prep_new_page + * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to + * code in gfp_to_alloc_flags that should be enforcing this. + */ + gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; + + return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); +} + +static inline struct page *dev_alloc_pages(unsigned int order) +{ + return __dev_alloc_pages(GFP_ATOMIC, order); +} + +/** + * __dev_alloc_page - allocate a page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx + * + * Allocate a new page. + * + * %NULL is returned if there is no free memory. + */ +static inline struct page *__dev_alloc_page(gfp_t gfp_mask) +{ + return __dev_alloc_pages(gfp_mask, 0); +} + +static inline struct page *dev_alloc_page(void) +{ + return __dev_alloc_page(GFP_ATOMIC); +} + /** * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX -- cgit From 160d2aba550b23c6a538158511d5adccc400f04c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 11 Nov 2014 09:27:05 -0800 Subject: net: Remove __skb_alloc_page and __skb_alloc_pages Remove the two functions which are now dead code. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e5221f1ec72..73c370e615de 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2232,49 +2232,6 @@ static inline struct page *dev_alloc_page(void) return __dev_alloc_page(GFP_ATOMIC); } -/** - * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used - * @order: size of the allocation - * - * Allocate a new page. - * - * %NULL is returned if there is no free memory. -*/ -static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, - struct sk_buff *skb, - unsigned int order) -{ - struct page *page; - - gfp_mask |= __GFP_COLD; - - if (!(gfp_mask & __GFP_NOMEMALLOC)) - gfp_mask |= __GFP_MEMALLOC; - - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - if (skb && page && page->pfmemalloc) - skb->pfmemalloc = true; - - return page; -} - -/** - * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used - * - * Allocate a new page. - * - * %NULL is returned if there is no free memory. - */ -static inline struct page *__skb_alloc_page(gfp_t gfp_mask, - struct sk_buff *skb) -{ - return __skb_alloc_pages(gfp_mask, skb, 0); -} - /** * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page * @page: The page that was allocated from skb_alloc_page -- cgit From c31accd159a6477b91de61ae237dce38e3f3ee4d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 11 Nov 2014 19:45:57 +0100 Subject: net: phy: add module_phy_driver macro Add helper macro for PHY drivers which do not do anything special in module init/exit. This will allow us to eliminate a lot of boilerplate code. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- include/linux/phy.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d090cfcaa167..07794e720139 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -772,4 +772,28 @@ int __init mdio_bus_init(void); void mdio_bus_exit(void); extern struct bus_type mdio_bus_type; + +/** + * module_phy_driver() - Helper macro for registering PHY drivers + * @__phy_drivers: array of PHY drivers to register + * + * Helper macro for PHY drivers which do not do anything special in module + * init/exit. Each module may only use this macro once, and calling it + * replaces module_init() and module_exit(). + */ +#define phy_module_driver(__phy_drivers, __count) \ +static int __init phy_module_init(void) \ +{ \ + return phy_drivers_register(__phy_drivers, __count); \ +} \ +module_init(phy_module_init); \ +static void __exit phy_module_exit(void) \ +{ \ + phy_drivers_unregister(__phy_drivers, __count); \ +} \ +module_exit(phy_module_exit) + +#define module_phy_driver(__phy_drivers) \ + phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) + #endif /* __PHY_H */ -- cgit From 61f2dcba9a03d4fd9342f0d6821af0a46c7098e9 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Nov 2014 19:51:56 +0100 Subject: mac802154: add interframe spacing time handling This patch adds a new interframe spacing time handling into mac802154 layer. Interframe spacing time is a time period between each transmit. This patch adds a high resolution timer into mac802154 and starts on xmit complete with corresponding interframe spacing expire time if ifs_handling is true. We make it variable because it depends if interframe spacing time is handled by transceiver or mac802154. At the timer complete function we wake the netdev queue again. This avoids new frame transmit in range of interframe spacing time. For synced driver we add no handling of interframe spacing time. This is currently a lack of support in all synced xmit drivers. I suppose it's working because the latency of workqueue which is needed to call spi_sync. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index ce0f96a55976..5a40c0418438 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -36,6 +36,9 @@ #define IEEE802154_EXTENDED_ADDR_LEN 8 +#define IEEE802154_LIFS_PERIOD 40 +#define IEEE802154_SIFS_PERIOD 12 + #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ #define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ -- cgit From 1b2f309d70daf04b6a97b3753e375654532f6207 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Nov 2014 18:11:20 +0800 Subject: rhashtable: Move mutex_is_held under PROVE_LOCKING The rhashtable function mutex_is_held is only used when PROVE_LOCKING is enabled. This patch makes the mutex_is_held field in rhashtable optional depending on PROVE_LOCKING. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index fb298e9d6d3a..96ce8ceff554 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -65,7 +65,9 @@ struct rhashtable_params { size_t new_size); bool (*shrink_decision)(const struct rhashtable *ht, size_t new_size); +#ifdef CONFIG_PROVE_LOCKING int (*mutex_is_held)(void); +#endif }; /** -- cgit From 7b4ce2353467fdab6e003be7a3129fb09b09deac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Nov 2014 18:11:22 +0800 Subject: rhashtable: Add parent argument to mutex_is_held Currently mutex_is_held can only test locks in the that are global since it takes no arguments. This prevents rhashtable from being used in places where locks are lock, e.g., per-namespace locks. This patch adds a parent field to mutex_is_held and rhashtable_params so that local locks can be used (and tested). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 96ce8ceff554..473e26bdb91d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -66,7 +66,8 @@ struct rhashtable_params { bool (*shrink_decision)(const struct rhashtable *ht, size_t new_size); #ifdef CONFIG_PROVE_LOCKING - int (*mutex_is_held)(void); + int (*mutex_is_held)(void *parent); + void *parent; #endif }; -- cgit From 7ae0e400cd9396c41fe596d35dcc34feaa89a04f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:32 +0200 Subject: net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs Previously, the driver queried the firmware in order to get the number of supported EQs. Under SRIOV, since this was done before the driver notified the firmware how many VFs it actually needs, the firmware had to take into account a worst case scenario and always allocated four EQs per VF, where one was used for events while the others were used for completions. Now, when the firmware supports the asymmetric allocation scheme, denoted by exposing num_sys_eqs > 0 (--> MLX4_DEV_CAP_FLAG2_SYS_EQS), we use the QUERY_FUNC command to query the firmware before enabling SRIOV. Thus we can get more EQs and MSI-X vectors per function. Moreover, when running in the new firmware/driver mode, the limitation that the number of EQs should be a power of two is lifted. Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3d9bff00f24a..1c560eb870ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,7 +189,8 @@ enum { MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 13, MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, - MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16 + MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, + MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17 }; enum { @@ -443,6 +444,7 @@ struct mlx4_caps { int num_cqs; int max_cqes; int reserved_cqs; + int num_sys_eqs; int num_eqs; int reserved_eqs; int num_comp_vectors; -- cgit From de966c5928026b100a989c8cef761d306310a184 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:33 +0200 Subject: net/mlx4_core: Support more than 64 VFs We now allow up to 126 VFs. Note though that certain firmware versions only allow up to 80 VFs. Moreover, old HCAs only support 64 VFs. In these cases, we limit the maximum number of VFs to 64. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1c560eb870ad..cf09e65c2901 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -95,7 +95,7 @@ enum { enum { MLX4_MAX_NUM_PF = 16, - MLX4_MAX_NUM_VF = 64, + MLX4_MAX_NUM_VF = 126, MLX4_MAX_NUM_VF_P_PORT = 64, MLX4_MFUNC_MAX = 80, MLX4_MAX_EQ_NUM = 1024, @@ -190,7 +190,8 @@ enum { MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 14, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, - MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17 + MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, + MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18 }; enum { -- cgit From 6eba82248ef47fd478f940a418429e3ec95cb3db Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 13 Nov 2014 13:45:46 +0100 Subject: rhashtable: Drop gfp_flags arg in insert/remove functions Reallocation is only required for shrinking and expanding and both rely on a mutex for synchronization and callers of rhashtable_init() are in non atomic context. Therefore, no reason to continue passing allocation hints through the API. Instead, use GFP_KERNEL and add __GFP_NOWARN | __GFP_NORETRY to allow for silent fall back to vzalloc() without the OOM killer jumping in as pointed out by Eric Dumazet and Eric W. Biederman. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 473e26bdb91d..b93fd89b2e5e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -99,16 +99,16 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len); u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr); -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t); -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t); +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev, gfp_t flags); + struct rhash_head __rcu **pprev); bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); -int rhashtable_expand(struct rhashtable *ht, gfp_t flags); -int rhashtable_shrink(struct rhashtable *ht, gfp_t flags); +int rhashtable_expand(struct rhashtable *ht); +int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(const struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, -- cgit From a77f9c5dcdf8480a93332792c336fa2bf9d31229 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 14 Nov 2014 11:05:06 -0800 Subject: Revert "fast_hash: avoid indirect function calls" This reverts commit e5a2c899957659cd1a9f789bc462f9c0b35f5150. Commit e5a2c899 introduced an alternative_call, arch_fast_hash2, that selects between __jhash2 and __intel_crc4_2_hash based on the X86_FEATURE_XMM4_2. Unfortunately, the alternative_call system does not appear to be suitable for use with C functions, as register usage is not handled properly for the called functions. The __jhash2 function in particular clobbers registers that are not preserved when called via alternative_call, resulting in a panic for direct callers of arch_fast_hash2 on older CPUs lacking sse4_2. It is possible that __intel_crc4_2_hash works merely by chance because it uses fewer registers. This commit was suggested as the source of the problem by Jesse Gross . Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- include/linux/hash.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index 6e8fb028848c..d0494c399392 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -84,4 +84,38 @@ static inline u32 hash32_ptr(const void *ptr) return (u32)val; } +struct fast_hash_ops { + u32 (*hash)(const void *data, u32 len, u32 seed); + u32 (*hash2)(const u32 *data, u32 len, u32 seed); +}; + +/** + * arch_fast_hash - Caclulates a hash over a given buffer that can have + * arbitrary size. This function will eventually use an + * architecture-optimized hashing implementation if + * available, and trades off distribution for speed. + * + * @data: buffer to hash + * @len: length of buffer in bytes + * @seed: start seed + * + * Returns 32bit hash. + */ +extern u32 arch_fast_hash(const void *data, u32 len, u32 seed); + +/** + * arch_fast_hash2 - Caclulates a hash over a given buffer that has a + * size that is of a multiple of 32bit words. This + * function will eventually use an architecture- + * optimized hashing implementation if available, + * and trades off distribution for speed. + * + * @data: buffer to hash (must be 32bit padded) + * @len: number of 32bit words + * @seed: start seed + * + * Returns 32bit hash. + */ +extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed); + #endif /* _LINUX_HASH_H */ -- cgit From 960fb622f85180f36d3aff82af53e2be3db2f888 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2014 06:23:05 -0800 Subject: net: provide a per host RSS key generic infrastructure RSS (Receive Side Scaling) typically uses Toeplitz hash and a 40 or 52 bytes RSS key. Some drivers use a constant (and well known key), some drivers use a random key per port, making bonding setups hard to tune. Well known keys increase attack surface, considering that number of queues is usually a power of two. This patch provides infrastructure to help drivers doing the right thing. netdev_rss_key_fill() should be used by drivers to initialize their RSS key, even if they provide ethtool -X support to let user redefine the key later. A new /proc/sys/net/core/netdev_rss_key file can be used to get the host RSS key even for drivers not providing ethtool -x support, in case some applications want to precisely setup flows to match some RX queues. Tested: myhost:~# cat /proc/sys/net/core/netdev_rss_key 11:63:99:bb:79:fb:a5:a7:07:45:b2:20:bf:02:42:2d:08:1a:dd:19:2b:6b:23:ac:56:28:9d:70:c3:ac:e8:16:4b:b7:c1:10:53:a4:78:41:36:40:74:b6:15:ca:27:44:aa:b3:4d:72 myhost:~# ethtool -x eth0 RX flow hash indirection table for eth0 with 8 RX ring(s): 0: 0 1 2 3 4 5 6 7 RSS hash key: 11:63:99:bb:79:fb:a5:a7:07:45:b2:20:bf:02:42:2d:08:1a:dd:19:2b:6b:23:ac:56:28:9d:70:c3:ac:e8:16:4b:b7:c1:10:53:a4:78:41 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4a6f770377d3..db63cf459ba1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3422,6 +3422,12 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); + +/* RSS keys are 40 or 52 bytes long */ +#define NETDEV_RSS_KEY_LEN 52 +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +void netdev_rss_key_fill(void *buffer, size_t len); + int dev_get_nest_level(struct net_device *dev, bool (*type_check)(struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); -- cgit From b9d1ab7eb42ede51ffbb6cafffd0a521b30c12e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2014 06:23:16 -0800 Subject: mlx4: use netdev_rss_key_fill() helper Use of well known RSS key increases attack surface. Switch to a random one, using generic helper so that all ports share a common key. Also provide ethtool -x support to fetch RSS key Signed-off-by: Eric Dumazet Cc: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 5f4e36cf0091..467ccdf94c98 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -120,13 +120,15 @@ enum { MLX4_RSS_QPC_FLAG_OFFSET = 13, }; +#define MLX4_EN_RSS_KEY_SIZE 40 + struct mlx4_rss_context { __be32 base_qpn; __be32 default_qpn; u16 reserved; u8 hash_fn; u8 flags; - __be32 rss_key[10]; + __be32 rss_key[MLX4_EN_RSS_KEY_SIZE / sizeof(__be32)]; __be32 base_qpn_udp; }; -- cgit From cb41c8dd01d74d091618f72e28f0282f064a9f0a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 17 Nov 2014 08:20:54 +0100 Subject: ieee802154: rename and move WPAN_NUM_ defines This patch moves the 802.15.4 constraints WPAN_NUM_ defines into "net/ieee802154.h" which should contain all necessary 802.15.4 related information. Also rename these defines to a common name which is IEEE802154_MAX_CHANNEL and IEEE802154_MAX_PAGE. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 5a40c0418438..6e82d888287c 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -39,6 +39,9 @@ #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 +#define IEEE802154_MAX_CHANNEL 26 +#define IEEE802154_MAX_PAGE 31 + #define IEEE802154_FC_TYPE_BEACON 0x0 /* Frame is beacon */ #define IEEE802154_FC_TYPE_DATA 0x1 /* Frame is data */ #define IEEE802154_FC_TYPE_ACK 0x2 /* Frame is acknowledgment */ -- cgit From 3274f52073d88b62f3c5ace82ae9d48546232e72 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:44 -0800 Subject: bpf: add 'flags' attribute to BPF_MAP_UPDATE_ELEM command the current meaning of BPF_MAP_UPDATE_ELEM syscall command is: either update existing map element or create a new one. Initially the plan was to add a new command to handle the case of 'create new element if it didn't exist', but 'flags' style looks cleaner and overall diff is much smaller (more code reused), so add 'flags' attribute to BPF_MAP_UPDATE_ELEM command with the following meaning: #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ bpf_update_elem(fd, key, value, BPF_NOEXIST) call can fail with EEXIST if element already exists. bpf_update_elem(fd, key, value, BPF_EXIST) can fail with ENOENT if element doesn't exist. Userspace will call it as: int bpf_update_elem(int fd, void *key, void *value, __u64 flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), .flags = flags; }; return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } First two bits of 'flags' are used to encode style of bpf_update_elem() command. Bits 2-63 are reserved for future use. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3cf91754a957..51e9242e4803 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,7 +22,7 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value); + int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); }; -- cgit From d0003ec01c667b731c139e23de3306a8b328ccf5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:49 -0800 Subject: bpf: allow eBPF programs to use maps expose bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() map accessors to eBPF programs Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 51e9242e4803..75e94eaa228b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -133,4 +133,9 @@ struct bpf_prog *bpf_prog_get(u32 ufd); /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +/* verifier prototypes for helper functions called from eBPF programs */ +extern struct bpf_func_proto bpf_map_lookup_elem_proto; +extern struct bpf_func_proto bpf_map_update_elem_proto; +extern struct bpf_func_proto bpf_map_delete_elem_proto; + #endif /* _LINUX_BPF_H */ -- cgit From 1277b4a9f531e84e26f9e0210c1801b0c0bf81ca Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 9 Nov 2014 18:50:08 +0200 Subject: mac80211: retransmit TDLS teardown packet through AP if not ACKed Since the TDLS peer station might not receive the teardown packet (e.g., when in PS), this makes sure the packet is retransmitted - this time through the AP - if the TDLS peer didn't ACK the packet. Signed-off-by: Liad Kaufman Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f65b5446d983..4e2bb9107878 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -19,6 +19,7 @@ #include #include #include +#include /* * DS bit usage @@ -2418,6 +2419,30 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, return !!(tim->virtual_map[index] & mask); } +/** + * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) + * @skb: the skb containing the frame, length will not be checked + * @hdr_size: the size of the ieee80211_hdr that starts at skb->data + * + * This function assumes the frame is a data frame, and that the network header + * is in the correct place. + */ +static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) +{ + if (!skb_is_nonlinear(skb) && + skb->len > (skb_network_offset(skb) + 2)) { + /* Point to where the indication of TDLS should start */ + const u8 *tdls_data = skb_network_header(skb) - 2; + + if (get_unaligned_be16(tdls_data) == ETH_P_TDLS && + tdls_data[2] == WLAN_TDLS_SNAP_RFTYPE && + tdls_data[3] == WLAN_CATEGORY_TDLS) + return tdls_data[4]; + } + + return -1; +} + /* convert time units */ #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) -- cgit From 2cedd87960a809dd9bf683f72123b7dce6736f07 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 9 Nov 2014 18:50:13 +0200 Subject: mac80211: add BSS coex IE to TDLS setup frames Add the BSS coex IE in case we support HT40 channels, as mandated by section 8.5.13 in IEEE802.11 2012. Signed-off-by: Arik Nemtsov Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4e2bb9107878..adac1be67387 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2037,6 +2037,9 @@ enum ieee80211_tdls_actioncode { /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 +/* BSS Coex IE information field bits */ +#define WLAN_BSS_COEX_INFORMATION_REQUEST BIT(0) + /** * enum - mesh synchronization method identifier * -- cgit From 78632a17eaa7a5abdc22aac8ca5932d6cad59984 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 9 Nov 2014 18:50:14 +0200 Subject: cfg/mac80211: define TDLS channel switch feature bit Define some related TDLS protocol constants and advertise channel switch support in the extended-capabilities IE when the feature bit is defined. Actually supporting TDLS channel-switching also requires support for some new nl80211 commands, to be introduced by future patches. Signed-off-by: Arik Nemtsov Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index adac1be67387..fbb02d240658 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2019,6 +2019,11 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING BIT(2) +/* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */ +#define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) +#define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) +#define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) + /* Interworking capabilities are set in 7th bit of 4th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ @@ -2030,6 +2035,7 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) +#define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED BIT(7) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(7) -- cgit From 53837584438f8899e061ada4663ae1d09b49b96a Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 9 Nov 2014 18:50:18 +0200 Subject: mac80211: add parsing of TDLS specific IEs These are used in TDLS channel switching code. Signed-off-by: Arik Nemtsov Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fbb02d240658..4f4eea8a6288 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1067,6 +1067,12 @@ struct ieee80211_pspoll { /* TDLS */ +/* Channel switch timing */ +struct ieee80211_ch_switch_timing { + __le16 switch_time; + __le16 switch_timeout; +} __packed; + /* Link-id information element */ struct ieee80211_tdls_lnkie { u8 ie_type; /* Link Identifier IE */ @@ -1108,6 +1114,15 @@ struct ieee80211_tdls_data { u8 dialog_token; u8 variable[0]; } __packed discover_req; + struct { + u8 target_channel; + u8 oper_class; + u8 variable[0]; + } __packed chan_switch_req; + struct { + __le16 status_code; + u8 variable[0]; + } __packed chan_switch_resp; } u; } __packed; -- cgit From 666547ff591cebdedc4679bf6b1b3f3383a8dea3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 14:03:05 -0400 Subject: separate kernel- and userland-side msghdr Kernel-side struct msghdr is (currently) using the same layout as userland one, but it's not a one-to-one copy - even without considering 32bit compat issues, we have msg_iov, msg_name and msg_control copied to kernel[1]. It's fairly localized, so we get away with a few functions where that knowledge is needed (and we could shrink that set even more). Pretty much everything deals with the kernel-side variant and the few places that want userland one just use a bunch of force-casts to paper over the differences. The thing is, kernel-side definition of struct msghdr is *not* exposed in include/uapi - libc doesn't see it, etc. So we can add struct user_msghdr, with proper annotations and let the few places that ever deal with those beasts use it for userland pointers. Saner typechecking aside, that will allow to change the layout of kernel-side msghdr - e.g. replace msg_iov/msg_iovlen there with struct iov_iter, getting rid of the need to modify the iovec as we copy data to/from it, etc. We could introduce kernel_msghdr instead, but that would create much more noise - the absolute majority of the instances would need to have the type switched to kernel_msghdr and definition of struct msghdr in include/linux/socket.h is not going to be seen by userland anyway. This commit just introduces user_msghdr and switches the few places that are dealing with userland-side msghdr to it. [1] actually, it's even trickier than that - we copy msg_control for sendmsg, but keep the userland address on recvmsg. Signed-off-by: Al Viro --- include/linux/socket.h | 16 +++++++++++++--- include/linux/syscalls.h | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index bb9b83640070..51bd6668b80e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -53,10 +53,20 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ }; + +struct user_msghdr { + void __user *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iovec __user *msg_iov; /* scatter/gather array */ + __kernel_size_t msg_iovlen; /* # elements in msg_iov */ + void __user *msg_control; /* ancillary data */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ +}; /* For recvmmsg/sendmmsg */ struct mmsghdr { - struct msghdr msg_hdr; + struct user_msghdr msg_hdr; unsigned int msg_len; }; @@ -319,8 +329,8 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec; /* The __sys_...msg variants allow MSG_CMSG_COMPAT */ -extern long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); -extern long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); +extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bda9b81357cc..c9afdc7a7f84 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -25,7 +25,7 @@ struct linux_dirent64; struct list_head; struct mmap_arg_struct; struct msgbuf; -struct msghdr; +struct user_msghdr; struct mmsghdr; struct msqid_ds; struct new_utsname; @@ -601,13 +601,13 @@ asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_send(int, void __user *, size_t, unsigned); asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, struct sockaddr __user *, int); -asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); +asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags); asmlinkage long sys_recv(int, void __user *, size_t, unsigned); asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, struct sockaddr __user *, int __user *); -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); +asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags, struct timespec __user *timeout); -- cgit From 08adb7dabd4874cc5666b4490653b26534702ce0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Nov 2014 20:23:13 -0500 Subject: fold verify_iovec() into copy_msghdr_from_user() ... and do the same on the compat side of things. Signed-off-by: Al Viro --- include/linux/socket.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 51bd6668b80e..de5222832be4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -322,7 +322,6 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, extern unsigned long iov_pages(const struct iovec *iov, int offset, unsigned long nr_segs); -extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode); extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -- cgit From 860f6e9eb780443381a76e3766a9698afbc5e2e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 Nov 2014 12:59:14 +0100 Subject: net: phy: add static data field to struct phy_driver Add static driver-data field to struct phy_driver, which can be used to store structured device-type information. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 07794e720139..22af8f8f5802 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -433,6 +433,7 @@ struct phy_device { * by this PHY * flags: A bitfield defining certain other features this PHY * supports (like interrupts) + * driver_data: static driver data * * The drivers must implement config_aneg and read_status. All * other functions are optional. Note that none of these @@ -448,6 +449,7 @@ struct phy_driver { unsigned int phy_id_mask; u32 features; u32 flags; + const void *driver_data; /* * Called to issue a PHY software reset -- cgit From 63f44b2bfccdd98193bbd602747f780c0fae0f02 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 Nov 2014 12:59:18 +0100 Subject: net: phy: micrel: add generic clock-mode-select support Add generic RMII-Reference-Clock-Select support. Several Micrel PHY have an RMII-Reference-Clock-Select bit to select 25 MHz or 50 MHz clock mode. Recently, support for configuring this through device tree for KSZ8021 and KSZ8031 was added. Generalise this support so that it can be configured for other PHY types as well. Note that some PHY revisions (of the same type) has this bit inverted. This should be either configurable through a new device-tree property, or preferably, determined based on PHY ID if possible. Also note that this removes support for setting 25 MHz mode from board files which was also added by the above mentioned commit 45f56cb82e45 ("net/phy: micrel: Add clock support for KSZ8021/KSZ8031"). Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 53d33dee70e1..2e5b194b9b19 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -37,7 +37,6 @@ /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 -#define MICREL_PHY_25MHZ_CLK 0x00000002 #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit From b960a0ac6939ef4962c5abbf33e80d1382b45fc1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:04:56 +0100 Subject: vlan: make __vlan_hwaccel_put_tag return void Always returns the same skb it gets, so change to void. Signed-off-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d69f0577a319..1b5dbc2711f9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -347,13 +347,11 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, * * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest */ -static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, - __be16 vlan_proto, - u16 vlan_tci) +static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { skb->vlan_proto = vlan_proto; skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; - return skb; } /** @@ -368,7 +366,8 @@ static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) { - return __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + return skb; } else { return __vlan_put_tag(skb, vlan_proto, vlan_tci); } -- cgit From b4bef1b57544b18899eb15569e3bafd8d2eeeff6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:04:57 +0100 Subject: vlan: kill vlan_put_tag helper Since both tx and rx paths work with skb->vlan_tci, there's no need for this function anymore. Switch users directly to __vlan_hwaccel_put_tag. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 1b5dbc2711f9..75b70a5e4a6d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -354,25 +354,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; } -/** - * vlan_put_tag - inserts VLAN tag according to device features - * @skb: skbuff to tag - * @vlan_tci: VLAN TCI to insert - * - * Assumes skb->dev is the target that will xmit this frame. - * Returns a VLAN tagged skb. - */ -static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) -{ - if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) { - __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); - return skb; - } else { - return __vlan_put_tag(skb, vlan_proto, vlan_tci); - } -} - /** * __vlan_get_tag - get the VLAN ID that is part of the payload * @skb: skbuff to query -- cgit From 62749e2cb3c4a7da3eaa5c01a7e787aebeff8536 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:04:58 +0100 Subject: vlan: rename __vlan_put_tag to vlan_insert_tag_set_proto Name fits better. Plus there's going to be introduced __vlan_insert_tag later on. Signed-off-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 75b70a5e4a6d..46e4a15b9b55 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -320,8 +320,9 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, } /** - * __vlan_put_tag - regular VLAN tag inserting + * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload @@ -330,8 +331,9 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, + __be16 vlan_proto, + u16 vlan_tci) { skb = vlan_insert_tag(skb, vlan_proto, vlan_tci); if (skb) -- cgit From 5968250c868ceee680aa77395b24e6ddcae17d36 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:04:59 +0100 Subject: vlan: introduce *vlan_hwaccel_push_inside helpers Use them to push skb->vlan_tci into the payload and avoid code duplication. Signed-off-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 46e4a15b9b55..291e6706876e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -341,6 +341,40 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, return skb; } +/* + * __vlan_hwaccel_push_inside - pushes vlan tag to the payload + * @skb: skbuff to tag + * + * Pushes the VLAN tag from @skb->vlan_tci inside to the payload. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) +{ + skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (likely(skb)) + skb->vlan_tci = 0; + return skb; +} +/* + * vlan_hwaccel_push_inside - pushes vlan tag to the payload + * @skb: skbuff to tag + * + * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the + * VLAN tag from @skb->vlan_tci inside to the payload. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb)) + skb = __vlan_hwaccel_push_inside(skb); + return skb; +} + /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting * @skb: skbuff to tag -- cgit From 15255a43e6c917813800702e100267046e240cc0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:05:00 +0100 Subject: vlan: introduce __vlan_insert_tag helper which does not free skb There's a need for helper which inserts vlan tag but does not free the skb in case of an error. Suggested-by: Pravin Shelar Signed-off-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 291e6706876e..515a35e2a48a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -282,28 +282,24 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, } /** - * vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. - * - * Following the skb_unshare() example, in case of error, the calling function - * doesn't have to worry about freeing the original skb. + * Returns error if skb_cow_head failes. * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline int __vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { struct vlan_ethhdr *veth; - if (skb_cow_head(skb, VLAN_HLEN) < 0) { - dev_kfree_skb_any(skb); - return NULL; - } + if (skb_cow_head(skb, VLAN_HLEN) < 0) + return -ENOMEM; + veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); /* Move the mac addresses to the beginning of the new header. */ @@ -316,6 +312,33 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); + return 0; +} + +/** + * vlan_insert_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + int err; + + err = __vlan_insert_tag(skb, vlan_proto, vlan_tci); + if (err) { + dev_kfree_skb_any(skb); + return NULL; + } return skb; } -- cgit From e21951212f03b8d805795d8f71206853b2ab344d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:05:01 +0100 Subject: net: move make_writable helper into common code note that skb_make_writable already exists in net/netfilter/core.c but does something slightly different. Suggested-by: Eric Dumazet Signed-off-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 73c370e615de..e045516891a9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2678,6 +2678,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); +int skb_ensure_writable(struct sk_buff *skb, int write_len); struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); -- cgit From 93515d53b133d66f01aec7b231fa3e40e3d2fd9a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:05:02 +0100 Subject: net: move vlan pop/push functions into common code So it can be used from out of openvswitch code. Did couple of cosmetic changes on the way, namely variable naming and adding support for 8021AD proto. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e045516891a9..78c299f40bac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2679,6 +2679,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_vlan_pop(struct sk_buff *skb); +int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); -- cgit From 479163f4608214d18bc3266ab6e4b578897a3052 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Nov 2014 08:13:57 +0000 Subject: mlx5: don't duplicate kvfree() Signed-off-by: Al Viro Acked-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 246310dc8bef..b1bf41556b32 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -633,14 +633,6 @@ static inline void *mlx5_vzalloc(unsigned long size) return rtn; } -static inline void mlx5_vfree(const void *addr) -{ - if (addr && is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} - static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; -- cgit From 227158db160449b6513d2e31894a135104b90e90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 18:47:38 -0400 Subject: new helper: skb_copy_and_csum_datagram_msg() Signed-off-by: Al Viro --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 78c299f40bac..cbe4b2078b30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2651,6 +2651,11 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); +static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, + struct msghdr *msg) +{ + return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov); +} int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, const struct iovec *from, int from_offset, int len); -- cgit From 6ce8e9ce5989ae13f493062975304700be86d20e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 21:25:44 -0400 Subject: new helper: memcpy_from_msg() Signed-off-by: Al Viro --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cbe4b2078b30..97dc5f8123b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2687,6 +2687,11 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) +{ + return memcpy_fromiovec(data, msg->msg_iov, len); +} + struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); -- cgit From 7eab8d9e8a722ca07bc785f73e21c3d3418defa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 21:51:23 -0400 Subject: new helper: memcpy_to_msg() Signed-off-by: Al Viro --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97dc5f8123b3..d048347a010a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2692,6 +2692,11 @@ static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) return memcpy_fromiovec(data, msg->msg_iov, len); } +static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) +{ + return memcpy_toiovec(msg->msg_iov, data, len); +} + struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); -- cgit From 3a654f975bf99165016fe257a3d2b4e6716e4931 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jun 2014 14:15:22 -0400 Subject: new helpers: skb_copy_datagram_from_iter() and zerocopy_sg_from_iter() Signed-off-by: Al Viro --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d048347a010a..a01cd9ad0b51 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2659,10 +2659,13 @@ static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, const struct iovec *from, int from_offset, int len); +int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, int offset, size_t count); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -- cgit From 195e952d03a797aa953f62ffe24ec58693e17ed8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Nov 2014 00:56:48 -0500 Subject: kill zerocopy_sg_from_iovec() no users left Signed-off-by: Al Viro --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01cd9ad0b51..178cdbde82f0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2661,8 +2661,6 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, int len); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); -int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, - int offset, size_t count); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); -- cgit From 8feb2fb2bb986c533e18037d3c45a5f779421992 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Nov 2014 01:10:59 -0500 Subject: switch AF_PACKET and AF_UNIX to skb_copy_datagram_from_iter() ... and kill skb_copy_datagram_iovec() Signed-off-by: Al Viro --- include/linux/skbuff.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 178cdbde82f0..7691ad5b4771 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2656,9 +2656,6 @@ static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, { return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov); } -int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - const struct iovec *from, int from_offset, - int len); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, -- cgit From 2ad7bf3638411cb547f2823df08166c13ab04269 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sun, 23 Nov 2014 23:07:46 -0800 Subject: ipvlan: Initial check-in of the IPVLAN driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is very similar to the macvlan driver except that it uses L3 on the frame to determine the logical interface while functioning as packet dispatcher. It inherits L2 of the master device hence the packets on wire will have the same L2 for all the packets originating from all virtual devices off of the same master device. This driver was developed keeping the namespace use-case in mind. Hence most of the examples given here take that as the base setup where main-device belongs to the default-ns and virtual devices are assigned to the additional namespaces. The device operates in two different modes and the difference in these two modes in primarily in the TX side. (a) L2 mode : In this mode, the device behaves as a L2 device. TX processing upto L2 happens on the stack of the virtual device associated with (namespace). Packets are switched after that into the main device (default-ns) and queued for xmit. RX processing is simple and all multicast, broadcast (if applicable), and unicast belonging to the address(es) are delivered to the virtual devices. (b) L3 mode : In this mode, the device behaves like a L3 device. TX processing upto L3 happens on the stack of the virtual device associated with (namespace). Packets are switched to the main-device (default-ns) for the L2 processing. Hence the routing table of the default-ns will be used in this mode. RX processins is somewhat similar to the L2 mode except that in this mode only Unicast packets are delivered to the virtual device while main-dev will handle all other packets. The devices can be added using the "ip" command from the iproute2 package - ip link add link type ipvlan mode [ l2 | l3 ] Signed-off-by: Mahesh Bandewar Cc: Eric Dumazet Cc: Maciej Żenczykowski Cc: Laurent Chavey Cc: Tim Hockin Cc: Brandon Philips Cc: Pavel Emelianov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5cd508787572..2cb772495f7a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1230,6 +1230,8 @@ enum netdev_priv_flags { IFF_LIVE_ADDR_CHANGE = 1<<20, IFF_MACVLAN = 1<<21, IFF_XMIT_DST_RELEASE_PERM = 1<<22, + IFF_IPVLAN_MASTER = 1<<23, + IFF_IPVLAN_SLAVE = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1255,6 +1257,8 @@ enum netdev_priv_flags { #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE #define IFF_MACVLAN IFF_MACVLAN #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM +#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER +#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE /** * struct net_device - The DEVICE structure. -- cgit From 40af86a40cdcabd48ab9636fe13d6763a7d74bc9 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Thu, 13 Nov 2014 00:30:23 +0100 Subject: NFC: st21nfca: Remove gpio_irq field in static and dts configuration - phy->gpio_irq is never done out of the request resources. - irq_of_parse_and_map is already done in the i2c core so client->irq is already set when entering in st21nfca_hci_i2c_of_request_resources - In case of static platform configuration client->irq can be set directly - It simplifies the code a bit. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfca.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h index 1730312398ff..5087fff96d86 100644 --- a/include/linux/platform_data/st21nfca.h +++ b/include/linux/platform_data/st21nfca.h @@ -24,7 +24,6 @@ #define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" struct st21nfca_nfc_platform_data { - unsigned int gpio_irq; unsigned int gpio_ena; unsigned int irq_polarity; }; -- cgit From a80d0cb6f6addc5a1f3852466fe8d37ca4fe1350 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Thu, 13 Nov 2014 00:30:26 +0100 Subject: NFC: st21nfcb: Remove gpio_irq field in static and dts configuration - phy->gpio_irq is never done out of the request resources. - irq_of_parse_and_map is already done in the i2c core so client->irq is already set when entering in st21nfcb_hci_i2c_of_request_resources - In case of static platform configuration client->irq can be set directly. - It simplifies the code a bit. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfcb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h index 2d11f1f5efab..c3b432f5b63e 100644 --- a/include/linux/platform_data/st21nfcb.h +++ b/include/linux/platform_data/st21nfcb.h @@ -24,7 +24,6 @@ #define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci" struct st21nfcb_nfc_platform_data { - unsigned int gpio_irq; unsigned int gpio_reset; unsigned int irq_polarity; }; -- cgit From f6f6424ba773da6221ecaaa70973eb4dacfa03b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 28 Nov 2014 14:34:15 +0100 Subject: net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del Do the work of parsing NDA_VLAN directly in rtnetlink code, pass simple u16 vid to drivers from there. Signed-off-by: Jiri Pirko Acked-by: Andy Gospodarek Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++--- include/linux/rtnetlink.h | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2cb772495f7a..589929cf4700 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -951,11 +951,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 flags) + * const unsigned char *addr, u16 vid, u16 flags) * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr) + * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1128,11 +1128,13 @@ struct net_device_ops { struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, + u16 vid, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6cacbce1a06c..063f0f581fe0 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -94,11 +94,13 @@ extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 flags); + u16 vid, + u16 flags); extern int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr); + const unsigned char *addr, + u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode); -- cgit From 02637fce3e0103ba086b9c33b6d529e69460e4b6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 28 Nov 2014 14:34:16 +0100 Subject: net: rename netdev_phys_port_id to more generic name So this can be reused for identification of other "items" as well. Signed-off-by: Jiri Pirko Reviewed-by: Thomas Graf Acked-by: John Fastabend Acked-by: Andy Gospodarek Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 589929cf4700..4bd41d72559d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -754,13 +754,13 @@ struct netdev_fcoe_hbainfo { }; #endif -#define MAX_PHYS_PORT_ID_LEN 32 +#define MAX_PHYS_ITEM_ID_LEN 32 -/* This structure holds a unique identifier to identify the - * physical port used by a netdevice. +/* This structure holds a unique identifier to identify some + * physical item (port for example) used by a netdevice. */ -struct netdev_phys_port_id { - unsigned char id[MAX_PHYS_PORT_ID_LEN]; +struct netdev_phys_item_id { + unsigned char id[MAX_PHYS_ITEM_ID_LEN]; unsigned char id_len; }; @@ -976,7 +976,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. * * int (*ndo_get_phys_port_id)(struct net_device *dev, - * struct netdev_phys_port_id *ppid); + * struct netdev_phys_item_id *ppid); * Called to get ID of physical port of this device. If driver does * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. @@ -1152,7 +1152,7 @@ struct net_device_ops { int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, - struct netdev_phys_port_id *ppid); + struct netdev_phys_item_id *ppid); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2870,7 +2870,7 @@ void dev_set_group(struct net_device *, int); int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_port_id *ppid); + struct netdev_phys_item_id *ppid); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit From 007f790c8276271de26416f90d55561bcc96588a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 28 Nov 2014 14:34:17 +0100 Subject: net: introduce generic switch devices support The goal of this is to provide a possibility to support various switch chips. Drivers should implement relevant ndos to do so. Now there is only one ndo defined: - for getting physical switch id is in place. Note that user can use random port netdevice to access the switch. Signed-off-by: Jiri Pirko Reviewed-by: Thomas Graf Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4bd41d72559d..3603f31e78f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1018,6 +1018,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * performing GSO on a packet. The device returns true if it is * able to GSO the packet, false otherwise. If the return value is * false the stack will do software GSO. + * + * int (*ndo_switch_parent_id_get)(struct net_device *dev, + * struct netdev_phys_item_id *psid); + * Called to get an ID of the switch chip this port is part of. + * If driver implements this, it indicates that it represents a port + * of a switch chip. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1171,6 +1177,10 @@ struct net_device_ops { int (*ndo_get_lock_subclass)(struct net_device *dev); bool (*ndo_gso_check) (struct sk_buff *skb, struct net_device *dev); +#ifdef CONFIG_NET_SWITCHDEV + int (*ndo_switch_parent_id_get)(struct net_device *dev, + struct netdev_phys_item_id *psid); +#endif }; /** -- cgit From 38dcf357aed299186ecb090cc2f5290cc17d637d Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:20 +0100 Subject: bridge: call netdev_sw_port_stp_update when bridge port STP status changes To notify switch driver of change in STP state of bridge port, add new .ndo op and provide switchdev wrapper func to call ndo op. Use it in bridge code then. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: Andy Gospodarek Acked-by: Thomas Graf Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3603f31e78f3..29c92ee9ed56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1024,6 +1024,9 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Called to get an ID of the switch chip this port is part of. * If driver implements this, it indicates that it represents a port * of a switch chip. + * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); + * Called to notify switch device port of bridge port STP + * state change. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1180,6 +1183,8 @@ struct net_device_ops { #ifdef CONFIG_NET_SWITCHDEV int (*ndo_switch_parent_id_get)(struct net_device *dev, struct netdev_phys_item_id *psid); + int (*ndo_switch_port_stp_update)(struct net_device *dev, + u8 state); #endif }; -- cgit From cf6b8e1eedffd9ef9a22c0c9453d752b07daf89a Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:21 +0100 Subject: bridge: add API to notify bridge driver of learned FBD on offloaded device When the swdev device learns a new mac/vlan on a port, it sends some async notification to the driver and the driver installs an FDB in the device. To give a holistic system view, the learned mac/vlan should be reflected in the bridge's FBD table, so the user, using normal iproute2 cmds, can view what is currently learned by the device. This API on the bridge driver gives a way for the swdev driver to install an FBD entry in the bridge FBD table. (And remove one). This is equivalent to the device running these cmds: bridge fdb [add|del] dev vid master This patch needs some extra eyeballs for review, in paricular around the locking and contexts. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 808dcb8cc04f..fa2eca625129 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -37,6 +37,24 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use typedef int br_should_route_hook_t(struct sk_buff *skb); extern br_should_route_hook_t __rcu *br_should_route_hook; +#if IS_ENABLED(CONFIG_BRIDGE) +int br_fdb_external_learn_add(struct net_device *dev, + const unsigned char *addr, u16 vid); +int br_fdb_external_learn_del(struct net_device *dev, + const unsigned char *addr, u16 vid); +#else +static inline int br_fdb_external_learn_add(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return 0; +} +static inline int br_fdb_external_learn_del(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return 0; +} +#endif + #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); -- cgit From 065c212a9e25172069f368b36228379521dadb65 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:22 +0100 Subject: bridge: move private brport flags to if_bridge.h so port drivers can use flags Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Acked-by: Andy Gospodarek Acked-by: Florian Fainelli --- include/linux/if_bridge.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index fa2eca625129..2c81a8efd24d 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -15,6 +15,7 @@ #include #include +#include struct br_ip { union { @@ -32,6 +33,17 @@ struct br_ip_list { struct br_ip addr; }; +#define BR_HAIRPIN_MODE BIT(0) +#define BR_BPDU_GUARD BIT(1) +#define BR_ROOT_BLOCK BIT(2) +#define BR_MULTICAST_FAST_LEAVE BIT(3) +#define BR_ADMIN_COST BIT(4) +#define BR_LEARNING BIT(5) +#define BR_FLOOD BIT(6) +#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) +#define BR_PROMISC BIT(7) +#define BR_PROXYARP BIT(8) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); -- cgit From efacacdaf7cb5a0592ed772e3731636b2742e34a Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:23 +0100 Subject: bridge: add new brport flag LEARNING_SYNC This policy flag controls syncing of learned FDB entries to bridge's FDB. If on, FDB entries learned on bridge port device will be synced. If off, device may still learn new FDB entries but they will not be synced with bridge's FDB. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Roopa Prabhu Acked-by: Jamal Hadi Salim Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 2c81a8efd24d..0a8ce762a47f 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -43,6 +43,7 @@ struct br_ip_list { #define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) +#define BR_LEARNING_SYNC BIT(9) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -- cgit From 2c3c031c8f8930861815fa1685d7c5e8ccec047c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:25 +0100 Subject: bridge: add brport flags to dflt bridge_getlink To allow brport device to return current brport flags set on port. Add returned flags to nested IFLA_PROTINFO netlink msg built in dflt getlink. With this change, netlink msg returned for bridge_getlink contains the port's offloaded flag settings (the port's SELF settings). Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Andy Gospodarek Acked-by: Thomas Graf Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 063f0f581fe0..3b0419072f88 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -103,5 +103,6 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u16 mode); + struct net_device *dev, u16 mode, + u32 flags, u32 mask); #endif /* __LINUX_RTNETLINK_H */ -- cgit From 89aa075832b0da4402acebd698d0411dcc82d03e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:35 -0800 Subject: net: sock: allow eBPF programs to be attached to sockets introduce new setsockopt() command: setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) where prog_fd was received from syscall bpf(BPF_PROG_LOAD, attr, ...) and attr->prog_type == BPF_PROG_TYPE_SOCKET_FILTER setsockopt() calls bpf_prog_get() which increments refcnt of the program, so it doesn't get unloaded while socket is using the program. The same eBPF program can be attached to multiple sockets. User task exit automatically closes socket which calls sk_filter_uncharge() which decrements refcnt of eBPF program Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ include/linux/filter.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75e94eaa228b..bbfceb756452 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -128,7 +128,11 @@ struct bpf_prog_aux { struct work_struct work; }; +#ifdef CONFIG_BPF_SYSCALL void bpf_prog_put(struct bpf_prog *prog); +#else +static inline void bpf_prog_put(struct bpf_prog *prog) {} +#endif struct bpf_prog *bpf_prog_get(u32 ufd); /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); diff --git a/include/linux/filter.h b/include/linux/filter.h index ca95abd2bed1..caac2087a4d5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -381,6 +381,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); -- cgit From bac78aabcfece0c493b2ad824c68fbdc20448cbc Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 3 Dec 2014 17:54:13 +0000 Subject: can: dev: Consolidate and unify state change handling The handling of can error states is different between platforms. This is an attempt to correct that problem. I've moved this handling into a generic function for changing the error state. This ensures that error state changes are handled the same way everywhere (where this function is used). This new mechanism also adds reverse state transitioning in error frames, i.e. the user will be notified through the socket interface when the state goes down. Signed-off-by: Andri Yngvason Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index b37ea95bc348..c05ff0f9f9a5 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -127,6 +127,9 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); +void can_change_state(struct net_device *dev, struct can_frame *cf, + enum can_state tx_state, enum can_state rx_state); + void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -- cgit From c3582a2c4d0baf1fa3955c8b3d3d61308df474c7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 1 Dec 2014 13:28:39 -0800 Subject: hyperv: Add support for vNIC hot removal This patch adds proper handling of the vNIC hot removal event, which includes a rescind-channel-offer message from the host side that triggers vNIC close and removal. In this case, the notices to the host during close and removal is not necessary because the channel is rescinded. This patch blocks these unnecessary messages, and lets vNIC removal process complete normally. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- include/linux/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 08cfaff8a072..476c685ca6f9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -650,6 +650,8 @@ struct vmbus_channel { u8 monitor_grp; u8 monitor_bit; + bool rescind; /* got rescind msg */ + u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ -- cgit From b0ba512e25d729a43858ad1f6cb8b94dbb95dbeb Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Mon, 1 Dec 2014 16:18:08 -0800 Subject: net: bcmgenet: enable driver to work without a device tree Modify bcmgenet driver so that it can be used on Broadcom 7xxx MIPS-based STB platforms without a device tree. Signed-off-by: Petri Gynther Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/platform_data/bcmgenet.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/platform_data/bcmgenet.h (limited to 'include/linux') diff --git a/include/linux/platform_data/bcmgenet.h b/include/linux/platform_data/bcmgenet.h new file mode 100644 index 000000000000..26af54321958 --- /dev/null +++ b/include/linux/platform_data/bcmgenet.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_PLATFORM_DATA_BCMGENET_H__ +#define __LINUX_PLATFORM_DATA_BCMGENET_H__ + +#include +#include +#include + +struct bcmgenet_platform_data { + bool mdio_enabled; + phy_interface_t phy_interface; + int phy_address; + int phy_speed; + int phy_duplex; + u8 mac_address[ETH_ALEN]; + int genet_version; +}; + +#endif -- cgit From 97ede29e80eead50d8bd533cf163401b88c027be Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 2 Dec 2014 15:00:30 +0800 Subject: tipc: convert name table read-write lock to RCU Convert tipc name table read-write lock to RCU. After this change, a new spin lock is used to protect name table on write side while RCU is applied on read side. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Tested-by: Erik Hugne Signed-off-by: David S. Miller --- include/linux/rculist.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 372ad5e0dcb8..aa79b3c24f66 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -542,6 +542,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ typeof(*(pos)), member)) +/** + * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from_rcu(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif -- cgit From 0c7aac854f52d3302b88fd599216a810f490ab1f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 2 Dec 2014 12:26:14 +0200 Subject: net/mlx5_core: Remove unused dev cap enum fields These enumerations are not used so remove them. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1d67fd32e71c..ea4f1c46f761 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -219,23 +219,15 @@ enum { }; enum { - MLX5_DEV_CAP_FLAG_RC = 1LL << 0, - MLX5_DEV_CAP_FLAG_UC = 1LL << 1, - MLX5_DEV_CAP_FLAG_UD = 1LL << 2, MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, - MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, - MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, - MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, - MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, - MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; -- cgit From 9c0c112422a2a6b06fcddcaf21957676490cebba Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 3 Dec 2014 08:17:33 -0800 Subject: net: Add functions for handling padding frame and adding to length This patch adds two new helper functions skb_put_padto and eth_skb_pad. These functions deviate from the standard skb_pad or skb_padto in that they will also update the length and tail pointers so that they reflect the padding added to the frame. The eth_skb_pad helper is meant to be used with Ethernet devices to update either Rx or Tx frames so that they report the correct size. The skb_put_padto helper is meant to be used primarily in the transmit path for network devices that need frames to be padded up to some minimum size and don't wish to simply update the length somewhere external to the frame. The motivation behind this is that there are a number of implementations throughout the network device drivers that are all doing the same thing, but each a little bit differently and as a result several implementations contain bugs such as updating the length without updating the tail offset and other similar issues. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 12 ++++++++++++ include/linux/skbuff.h | 24 +++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 733980fce8e3..41c891d05f04 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -392,4 +392,16 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) #endif } +/** + * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * @skb: Buffer to pad + * + * An Ethernet frame should have a minimum size of 60 bytes. This function + * takes short frames and pads them with zeros up to the 60 byte limit. + */ +static inline int eth_skb_pad(struct sk_buff *skb) +{ + return skb_put_padto(skb, ETH_ZLEN); +} + #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7691ad5b4771..d1e2575000b9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2461,7 +2461,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ - static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2470,6 +2469,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) return skb_pad(skb, len - size); } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + + if (unlikely(size < len)) { + len -= size; + if (skb_pad(skb, len)) + return -ENOMEM; + __skb_put(skb, len); + } + return 0; +} + static inline int skb_add_data(struct sk_buff *skb, char __user *from, int copy) { -- cgit From 892311f66f2411b813ca631009356891a0c2b0a1 Mon Sep 17 00:00:00 2001 From: Eyal Perry Date: Tue, 2 Dec 2014 18:12:10 +0200 Subject: ethtool: Support for configurable RSS hash function This patch extends the set/get_rxfh ethtool-options for getting or setting the RSS hash function. It modifies drivers implementation of set/get_rxfh accordingly. This change also delegates the responsibility of checking whether a modification to a certain RX flow hash parameter is supported to the driver implementation of set_rxfh. User-kernel API is done through the new hfunc bitmask field in the ethtool_rxfh struct. A bit set in the hfunc field is corresponding to an index in the new string-set ETH_SS_RSS_HASH_FUNCS. Got approval from most of the relevant driver maintainers that their driver is using Toeplitz, and for the few that didn't answered, also assumed it is Toeplitz. Cc: Tom Lendacky Cc: Ariel Elior Cc: Prashant Sreedharan Cc: Michael Chan Cc: Hariprasad S Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Cc: Jeff Kirsher Cc: Jesse Brandeburg Cc: Bruce Allan Cc: Carolyn Wyborny Cc: Don Skidmore Cc: Greg Rose Cc: Matthew Vick Cc: John Ronciak Cc: Mitch Williams Cc: Amir Vadai Cc: Solarflare linux maintainers Cc: Shradha Shah Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: Ben Hutchings Signed-off-by: Eyal Perry Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c1a2d60dfb82..653dc9c4ebac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -59,6 +59,26 @@ enum ethtool_phys_id_state { ETHTOOL_ID_OFF }; +enum { + ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */ + ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */ + + /* + * Add your fresh new hash function bits above and remember to update + * rss_hash_func_strings[] in ethtool.c + */ + ETH_RSS_HASH_FUNCS_COUNT +}; + +#define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) +#define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) + +#define ETH_RSS_HASH_TOP __ETH_RSS_HASH(TOP) +#define ETH_RSS_HASH_XOR __ETH_RSS_HASH(XOR) + +#define ETH_RSS_HASH_UNKNOWN 0 +#define ETH_RSS_HASH_NO_CHANGE 0 + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -158,17 +178,14 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. - * @get_rxfh: Get the contents of the RX flow hash indirection table and hash - * key. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. - * Returns a negative error code or zero. - * @set_rxfh: Set the contents of the RX flow hash indirection table and/or - * hash key. In case only the indirection table or hash key is to be - * changed, the other argument will be %NULL. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table, hash key + * and/or hash function. * Returns a negative error code or zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table, hash + * key, and/or hash function. Arguments which are set to %NULL or zero + * will remain unchanged. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -241,9 +258,10 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key, + u8 *hfunc); int (*set_rxfh)(struct net_device *, const u32 *indir, - const u8 *key); + const u8 *key, const u8 hfunc); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); -- cgit From 395eea6ccf2b253f81b4718ffbcae67d36fe2e69 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 3 Dec 2014 13:46:24 -0800 Subject: rtnetlink: delay RTM_DELLINK notification until after ndo_uninit() The commit 56bfa7ee7c ("unregister_netdevice : move RTM_DELLINK to until after ndo_uninit") tried to do this ealier but while doing so it created a problem. Unfortunately the delayed rtmsg_ifinfo() also delayed call to fill_info(). So this translated into asking driver to remove private state and then query it's private state. This could have catastropic consequences. This change breaks the rtmsg_ifinfo() into two parts - one takes the precise snapshot of the device by called fill_info() before calling the ndo_uninit() and the second part sends the notification using collected snapshot. It was brought to notice when last link is deleted from an ipvlan device when it has free-ed the port and the subsequent .fill_info() call is trying to get the info from the port. kernel: [ 255.139429] ------------[ cut here ]------------ kernel: [ 255.139439] WARNING: CPU: 12 PID: 11173 at net/core/rtnetlink.c:2238 rtmsg_ifinfo+0x100/0x110() kernel: [ 255.139493] Modules linked in: ipvlan bonding w1_therm ds2482 wire cdc_acm ehci_pci ehci_hcd i2c_dev i2c_i801 i2c_core msr cpuid bnx2x ptp pps_core mdio libcrc32c kernel: [ 255.139513] CPU: 12 PID: 11173 Comm: ip Not tainted 3.18.0-smp-DEV #167 kernel: [ 255.139514] Hardware name: Intel RML,PCH/Ibis_QC_18, BIOS 1.0.10 05/15/2012 kernel: [ 255.139515] 0000000000000009 ffff880851b6b828 ffffffff815d87f4 00000000000000e0 kernel: [ 255.139516] 0000000000000000 ffff880851b6b868 ffffffff8109c29c 0000000000000000 kernel: [ 255.139518] 00000000ffffffa6 00000000000000d0 ffffffff81aaf580 0000000000000011 kernel: [ 255.139520] Call Trace: kernel: [ 255.139527] [] dump_stack+0x46/0x58 kernel: [ 255.139531] [] warn_slowpath_common+0x8c/0xc0 kernel: [ 255.139540] [] warn_slowpath_null+0x1a/0x20 kernel: [ 255.139544] [] rtmsg_ifinfo+0x100/0x110 kernel: [ 255.139547] [] rollback_registered_many+0x1d5/0x2d0 kernel: [ 255.139549] [] unregister_netdevice_many+0x1f/0xb0 kernel: [ 255.139551] [] rtnl_dellink+0xbb/0x110 kernel: [ 255.139553] [] rtnetlink_rcv_msg+0xa0/0x240 kernel: [ 255.139557] [] ? rhashtable_lookup_compare+0x43/0x80 kernel: [ 255.139558] [] ? __rtnl_unlock+0x20/0x20 kernel: [ 255.139562] [] netlink_rcv_skb+0xb1/0xc0 kernel: [ 255.139563] [] rtnetlink_rcv+0x25/0x40 kernel: [ 255.139565] [] netlink_unicast+0x178/0x230 kernel: [ 255.139567] [] netlink_sendmsg+0x30f/0x420 kernel: [ 255.139571] [] sock_sendmsg+0x9c/0xd0 kernel: [ 255.139575] [] ? rw_copy_check_uvector+0x6f/0x130 kernel: [ 255.139577] [] ? copy_msghdr_from_user+0x139/0x1b0 kernel: [ 255.139578] [] ___sys_sendmsg+0x304/0x310 kernel: [ 255.139581] [] ? handle_mm_fault+0xca3/0xde0 kernel: [ 255.139585] [] ? destroy_inode+0x3c/0x70 kernel: [ 255.139589] [] ? __do_page_fault+0x20c/0x500 kernel: [ 255.139597] [] ? dput+0xb6/0x190 kernel: [ 255.139606] [] ? mntput+0x26/0x40 kernel: [ 255.139611] [] ? __fput+0x174/0x1e0 kernel: [ 255.139613] [] __sys_sendmsg+0x49/0x90 kernel: [ 255.139615] [] SyS_sendmsg+0x12/0x20 kernel: [ 255.139617] [] system_call_fastpath+0x12/0x17 kernel: [ 255.139619] ---[ end trace 5e6703e87d984f6b ]--- Signed-off-by: Mahesh Bandewar Reported-by: Toshiaki Makita Cc: Eric Dumazet Cc: Roopa Prabhu Cc: David S. Miller Acked-by: Eric Dumazet Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3b0419072f88..5db76a32fcab 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -17,6 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, + unsigned change, gfp_t flags); +void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, + gfp_t flags); + /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); -- cgit From 6ffe75eb53564953e75c051e1c28676e1e56f385 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Dec 2014 17:04:39 -0800 Subject: net: avoid two atomic operations in fast clones Commit ce1a4ea3f125 ("net: avoid one atomic operation in skb_clone()") took the wrong way to save one atomic operation. It is actually possible to avoid two atomic operations, if we do not change skb->fclone values, and only rely on clone_ref content to signal if the clone is available or not. skb_clone() can simply use the fast clone if clone_ref is 1. kfree_skbmem() can avoid the atomic_dec_and_test() if clone_ref is 1. Note that because we usually free the clone before the original skb, this particular attempt is only done for the original skb to have better branch prediction. SKB_FCLONE_FREE is removed. Signed-off-by: Eric Dumazet Cc: Chris Mason Cc: Sabrina Dubroca Cc: Vijay Subramanian Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d1e2575000b9..e9281b5b7f59 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -344,7 +344,6 @@ enum { SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ - SKB_FCLONE_FREE, /* this companion fclone skb is available */ }; enum { @@ -818,7 +817,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE && + atomic_read(&fclones->fclone_ref) > 1 && fclones->skb2.sk == sk; } -- cgit From dbfc4fb7d578d4f224faa6b60deb40804dfdc1b1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 6 Dec 2014 19:19:42 +0100 Subject: dst: no need to take reference on DST_NOCACHE dsts Since commit f8864972126899 ("ipv4: fix dst race in sk_dst_get()") DST_NOCACHE dst_entries get freed by RCU. So there is no need to get a reference on them when we are in rcu protected sections. Cc: Eric Dumazet Cc: Julian Anastasov Signed-off-by: Hannes Frederic Sowa Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9281b5b7f59..ef64cec42804 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -717,9 +717,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); - /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer @@ -732,24 +729,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - __skb_dst_set_noref(skb, dst, false); -} - -/** - * skb_dst_set_noref_force - sets skb dst, without taking reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst. - * No reference is taken and no dst_release will be called. While for - * cached dsts deferred reclaim is a basic feature, for entries that are - * not cached it is caller's job to guarantee that last dst_release for - * provided dst happens when nobody uses it, eg. after a RCU grace period. - */ -static inline void skb_dst_set_noref_force(struct sk_buff *skb, - struct dst_entry *dst) -{ - __skb_dst_set_noref(skb, dst, true); + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** -- cgit From 2f33e7d59cb09d9b20d6648086b314f0ba61fb03 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 6 Dec 2014 15:53:04 -0800 Subject: netdevice: Add a function to check macvlan port Similar to a check for macvlan device, netif_is_macvlan(), add another function to check if a device is used as macvlan port. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29c92ee9ed56..1f49aac258f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3641,6 +3641,11 @@ static inline bool netif_is_macvlan(struct net_device *dev) return dev->priv_flags & IFF_MACVLAN; } +static inline bool netif_is_macvlan_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_MACVLAN_PORT; +} + static inline bool netif_is_bond_master(struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; -- cgit From 5933fea7aa7237ba52d67c069c39ad5c3ab7a036 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sat, 6 Dec 2014 15:53:33 -0800 Subject: ipvlan: move the device check function into netdevice.h Move the port check [ipvlan_dev_master()] and device check [ipvlan_dev_slave()] functions to netdevice.h and rename them netif_is_ipvlan_port() and netif_is_ipvlan() resp. to be consistent with macvlan api naming. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1f49aac258f9..c31f74d76ebd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3646,6 +3646,16 @@ static inline bool netif_is_macvlan_port(struct net_device *dev) return dev->priv_flags & IFF_MACVLAN_PORT; } +static inline bool netif_is_ipvlan(struct net_device *dev) +{ + return dev->priv_flags & IFF_IPVLAN_SLAVE; +} + +static inline bool netif_is_ipvlan_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_IPVLAN_MASTER; +} + static inline bool netif_is_bond_master(struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; -- cgit From f4362a2c9524678f0459cf410403f8595e5cfce5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 13:26:06 -0500 Subject: switch tcp_sock->ucopy from iovec (ucopy.iov) to msghdr (ucopy.msg) Signed-off-by: Al Viro --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f566b8567892..5d9cc9cd2855 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -162,7 +162,7 @@ struct tcp_sock { struct { struct sk_buff_head prequeue; struct task_struct *task; - struct iovec *iov; + struct msghdr *msg; int memory; int len; } ucopy; -- cgit From d838df2e5dcbb6ed4d82854869e9a30f9aeef6da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 19:32:50 -0500 Subject: vmci: propagate msghdr all way down to __qp_memcpy_from_queue() ... and switch it to memcpy_to_msg() Signed-off-by: Al Viro --- include/linux/vmw_vmci_api.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index 023430e265fe..5691f752ce8f 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -24,6 +24,7 @@ #define VMCI_KERNEL_API_VERSION_2 2 #define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_2 +struct msghdr; typedef void (vmci_device_shutdown_fn) (void *device_registration, void *user_data); @@ -75,8 +76,8 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, void *iov, size_t iov_size, int mode); ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, - void *iov, size_t iov_size, int mode); -ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, void *iov, size_t iov_size, + struct msghdr *msg, size_t iov_size, int mode); +ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int mode); #endif /* !__VMW_VMCI_API_H__ */ -- cgit From c0371da6047abd261bc483c744dbc7d81a116172 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 10:42:55 -0500 Subject: put iov_iter into msghdr Note that the code _using_ ->msg_iter at that point will be very unhappy with anything other than unshifted iovec-backed iov_iter. We still need to convert users to proper primitives. Signed-off-by: Al Viro --- include/linux/skbuff.h | 16 ++++++++++------ include/linux/socket.h | 3 +-- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ef64cec42804..52cf1bdac0d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2646,22 +2646,24 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to, int size); +int skb_copy_datagram_iter(const struct sk_buff *from, int offset, + struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, struct msghdr *msg, int size) { - return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size); + /* XXX: stripping const */ + return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size); } int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg) { - return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov); + /* XXX: stripping const */ + return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov); } int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); -int skb_copy_datagram_iter(const struct sk_buff *from, int offset, - struct iov_iter *to, int size); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); @@ -2689,12 +2691,14 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - return memcpy_fromiovec(data, msg->msg_iov, len); + /* XXX: stripping const */ + return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len); } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) { - return memcpy_toiovec(msg->msg_iov, data, len); + /* XXX: stripping const */ + return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len); } struct skb_checksum_ops { diff --git a/include/linux/socket.h b/include/linux/socket.h index de5222832be4..048d6d6eed6d 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -47,8 +47,7 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ - struct iovec *msg_iov; /* scatter/gather array */ - __kernel_size_t msg_iovlen; /* # elements in msg_iov */ + struct iov_iter msg_iter; /* data */ void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ -- cgit From e5a4b0bb803b39a36478451eae53a880d2663d5b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 18:17:55 -0500 Subject: switch memcpy_to_msg() and skb_copy{,_and_csum}_datagram_msg() to primitives ... making both non-draining. That means that tcp_recvmsg() becomes non-draining. And _that_ would break iscsit_do_rx_data() unless we a) make sure tcp_recvmsg() is uniformly non-draining (it is) b) make sure it copes with arbitrary (including shifted) iov_iter (it does, all it uses is iov_iter primitives) c) make iscsit_do_rx_data() initialize ->msg_iter only once. Fortunately, (c) is doable with minimal work and we are rid of one the two places where kernel send/recvmsg users would be unhappy with non-draining behaviour. Actually, that makes all but one of ->recvmsg() instances iov_iter-clean. The exception is skcipher_recvmsg() and it also isn't hard to convert to primitives (iov_iter_get_pages() is needed there). That'll wait a bit - there's some interplay with ->sendmsg() path for that one. Signed-off-by: Al Viro --- include/linux/skbuff.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 52cf1bdac0d8..4902f2df90c8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2651,17 +2651,10 @@ int skb_copy_datagram_iter(const struct sk_buff *from, int offset, static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, struct msghdr *msg, int size) { - /* XXX: stripping const */ - return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size); -} -int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, - struct iovec *iov); -static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, - struct msghdr *msg) -{ - /* XXX: stripping const */ - return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov); + return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size); } +int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, + struct msghdr *msg); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); @@ -2697,8 +2690,7 @@ static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) { - /* XXX: stripping const */ - return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len); + return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } struct skb_checksum_ops { -- cgit From d3a9632f09153bc46a8077844e05e179f1c10c3f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 18:29:54 -0500 Subject: skb_copy_datagram_iovec() can die no callers other than itself. Signed-off-by: Al Viro --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4902f2df90c8..ab0bc43c82a4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2644,8 +2644,6 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); -int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, - struct iovec *to, int size); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, -- cgit From 218321e7a0838c2be974539f0a5341b398d4432b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 19:45:05 -0500 Subject: bury memcpy_toiovec() no users left Signed-off-by: Al Viro --- include/linux/uio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index bd8569a14c4a..a41e252396c0 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -131,7 +131,6 @@ size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); -int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, -- cgit From 605ad7f184b60cfaacbc038aa6c55ee68dee3c89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 7 Dec 2014 12:22:18 -0800 Subject: tcp: refine TSO autosizing Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to control TSO size, but did this at the wrong place (sendmsg() time) At sendmsg() time, we might have a pessimistic view of flow rate, and we end up building very small skbs (with 2 MSS per skb). This is bad because : - It sends small TSO packets even in Slow Start where rate quickly increases. - It tends to make socket write queue very big, increasing tcp_ack() processing time, but also increasing memory needs, not necessarily accounted for, as fast clones overhead is currently ignored. - Lower GRO efficiency and more ACK packets. Servers with a lot of small lived connections suffer from this. Lets instead fill skbs as much as possible (64KB of payload), but split them at xmit time, when we have a precise idea of the flow rate. skb split is actually quite efficient. Patch looks bigger than necessary, because TCP Small Queue decision now has to take place after the eventual split. As Neal suggested, introduce a new tcp_tso_autosize() helper, so that tcp_tso_should_defer() can be synchronized on same goal. Rename tp->xmit_size_goal_segs to tp->gso_segs, as this variable contains number of mss that we can put in GSO packet, and is not related to the autosizing goal anymore. Tested: 40 ms rtt link nstat >/dev/null netperf -H remote -l -2000000 -- -s 1000000 nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets" Before patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/s 87380 2000000 2000000 0.36 44.22 IpInReceives 600 0.0 IpOutRequests 599 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2033249 0.0 After patch : Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 2000000 2000000 0.36 44.27 IpInReceives 221 0.0 IpOutRequests 232 0.0 TcpOutSegs 1397 0.0 IpExtOutOctets 2013953 0.0 Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f566b8567892..3fa0a9669a3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -130,7 +130,7 @@ struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; u16 tcp_header_len; /* Bytes of tcp header to send */ - u16 xmit_size_goal_segs; /* Goal for segmenting output packets */ + u16 gso_segs; /* Max number of segs per GSO packet */ /* * Header prediction flags -- cgit From ffde7328a36d16e626bae8468571858d71cd010b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Dec 2014 19:40:42 -0800 Subject: net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag This patch splits the netdev_alloc_frag function up so that it can be used on one of two page frag pools instead of being fixed on the netdev_alloc_cache. By doing this we can add a NAPI specific function __napi_alloc_frag that accesses a pool that is only used from softirq context. The advantage to this is that we do not need to call local_irq_save/restore which can be a significant savings. I also took the opportunity to refactor the core bits that were placed in __alloc_page_frag. First I updated the allocation to do either a 32K allocation or an order 0 page. This is based on the changes in commmit d9b2938aa where it was found that latencies could be reduced in case of failures. Then I also rewrote the logic to work from the end of the page to the start. By doing this the size value doesn't have to be used unless we have run out of space for page fragments. Finally I cleaned up the atomic bits so that we just do an atomic_sub_and_test and if that returns true then we set the page->_count via an atomic_set. This way we can remove the extra conditional for the atomic_read since it would have led to an atomic_inc in the case of success anyway. Signed-off-by: Alexander Duyck Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ab0bc43c82a4..736cc99f3f6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +void *napi_alloc_frag(unsigned int fragsz); + /** * __dev_alloc_pages - allocate page for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx -- cgit From fd11a83dd3630ec6a60f8a702446532c5c7e1991 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Dec 2014 19:40:49 -0800 Subject: net: Pull out core bits of __netdev_alloc_skb and add __napi_alloc_skb This change pulls the core functionality out of __netdev_alloc_skb and places them in a new function named __alloc_rx_skb. The reason for doing this is to make these bits accessible to a new function __napi_alloc_skb. In addition __alloc_rx_skb now has a new flags value that is used to determine which page frag pool to allocate from. If the SKB_ALLOC_NAPI flag is set then the NAPI pool is used. The advantage of this is that we do not have to use local_irq_save/restore when accessing the NAPI pool from NAPI context. In my test setup I saw at least 11ns of savings using the napi_alloc_skb function versus the netdev_alloc_skb function, most of this being due to the fact that we didn't have to call local_irq_save/restore. The main use case for napi_alloc_skb would be for things such as copybreak or page fragment based receive paths where an skb is allocated after the data has been received instead of before. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 736cc99f3f6c..85ab7d72b54c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -151,6 +151,7 @@ struct net_device; struct scatterlist; struct pipe_inode_info; struct iov_iter; +struct napi_struct; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -673,6 +674,7 @@ struct sk_buff { #define SKB_ALLOC_FCLONE 0x01 #define SKB_ALLOC_RX 0x02 +#define SKB_ALLOC_NAPI 0x04 /* Returns true if the skb was allocated from PFMEMALLOC reserves */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) @@ -2165,6 +2167,13 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, } void *napi_alloc_frag(unsigned int fragsz); +struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, + unsigned int length, gfp_t gfp_mask); +static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, + unsigned int length) +{ + return __napi_alloc_skb(napi, length, GFP_ATOMIC); +} /** * __dev_alloc_pages - allocate page for network Rx -- cgit From 0cb6c969ed9de43687abdfc63714b6fe4385d2fc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Dec 2014 16:33:12 +0100 Subject: net, lib: kill arch_fast_hash library bits As there are now no remaining users of arch_fast_hash(), lets kill it entirely. This basically reverts commit 71ae8aac3e19 ("lib: introduce arch optimized hash library") and follow-up work, that is f.e., commit 237217546d44 ("lib: hash: follow-up fixups for arch hash"), commit e3fec2f74f7f ("lib: Add missing arch generic-y entries for asm-generic/hash.h") and last but not least commit 6a02652df511 ("perf tools: Fix include for non x86 architectures"). Cc: Francesco Fusco Cc: Thomas Graf Cc: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/hash.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index d0494c399392..1afde47e1528 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -15,7 +15,6 @@ */ #include -#include #include /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ @@ -84,38 +83,4 @@ static inline u32 hash32_ptr(const void *ptr) return (u32)val; } -struct fast_hash_ops { - u32 (*hash)(const void *data, u32 len, u32 seed); - u32 (*hash2)(const u32 *data, u32 len, u32 seed); -}; - -/** - * arch_fast_hash - Caclulates a hash over a given buffer that can have - * arbitrary size. This function will eventually use an - * architecture-optimized hashing implementation if - * available, and trades off distribution for speed. - * - * @data: buffer to hash - * @len: length of buffer in bytes - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash(const void *data, u32 len, u32 seed); - -/** - * arch_fast_hash2 - Caclulates a hash over a given buffer that has a - * size that is of a multiple of 32bit words. This - * function will eventually use an architecture- - * optimized hashing implementation if available, - * and trades off distribution for speed. - * - * @data: buffer to hash (must be 32bit padded) - * @len: number of 32bit words - * @seed: start seed - * - * Returns 32bit hash. - */ -extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed); - #endif /* _LINUX_HASH_H */ -- cgit From f95b414edb18de59940dcebbefb49cf25c6d505c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 11 Dec 2014 11:22:04 +0800 Subject: net: introduce helper macro for_each_cmsghdr Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating cmsghdr from msghdr, just cleanup. Signed-off-by: Gu Zheng Signed-off-by: David S. Miller --- include/linux/socket.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 048d6d6eed6d..6e49a14365dc 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -103,6 +103,10 @@ struct cmsghdr { (cmsg)->cmsg_len <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char *)(cmsg) - (char *)(mhdr)->msg_control))) +#define for_each_cmsghdr(cmsg, msg) \ + for (cmsg = CMSG_FIRSTHDR(msg); \ + cmsg; \ + cmsg = CMSG_NXTHDR(msg, cmsg)) /* * Get the next cmsg header -- cgit From 3dca0f42c7baaa4e01699629da13d6556f001ebe Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:57:53 +0200 Subject: net/mlx4_core: Use tasklet for user-space CQ completion events Previously, we've fired all our completion callbacks straight from our ISR. Some of those callbacks were lightweight (for example, mlx4_en's and IPoIB napi callbacks), but some of them did more work (for example, the user-space RDMA stack uverbs' completion handler). Besides that, doing more than the minimal work in ISR is generally considered wrong, it could even lead to a hard lockup of the system. Since when a lot of completion events are generated by the hardware, the loop over those events could be so long, that we'll get into a hard lockup by the system watchdog. In order to avoid that, add a new way of invoking completion events callbacks. In the interrupt itself, we add the CQs which receive completion event to a per-EQ list and schedule a tasklet. In the tasklet context we loop over all the CQs in the list and invoke the user callback. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index cf09e65c2901..3951b5368d7e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -621,6 +621,11 @@ struct mlx4_cq { atomic_t refcount; struct completion free; + struct { + struct list_head list; + void (*comp)(struct mlx4_cq *); + void *priv; + } tasklet_ctx; }; struct mlx4_qp { -- cgit From ddae0349fdb78bcc5e7219061847012aa1a29069 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 11 Dec 2014 10:57:54 +0200 Subject: net/mlx4: Change QP allocation scheme When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset. The current Ethernet driver code reserves a Tx QP range with 256b alignment. This is wrong because if there are more than 64 Tx QPs in use, QPNs >= base + 65 will have bits 6/7 set. This problem is not specific for the Ethernet driver, any entity that tries to reserve more than 64 BF-enabled QPs should fail. Also, using ranges is not necessary here and is wasteful. The new mechanism introduced here will support reservation for "Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs (when hypervisors support WC in VMs). The flow we use is: 1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation, and request "BF enabled QPs" if BF is supported for the function 2. In the ALLOC_RES FW command, change param1 to: a. param1[23:0] - number of QPs b. param1[31-24] - flags controlling QPs reservation Bit 31 refers to Eth blueflame supported QPs. Those QPs must have bits 6 and 7 unset in order to be used in Ethernet. Bits 24-30 of the flags are currently reserved. When a function tries to allocate a QP, it states the required attributes for this QP. Those attributes are considered "best-effort". If an attribute, such as Ethernet BF enabled QP, is a must-have attribute, the function has to check that attribute is supported before trying to do the allocation. In a lower layer of the code, mlx4_qp_reserve_range masks out the bits which are unsupported. If SRIOV is used, the PF validates those attributes and masks out unsupported attributes as well. In order to notify VFs which attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's mailbox is filled by the PF, which notifies which QP allocation attributes it supports. Signed-off-by: Eugenia Emantayev Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3951b5368d7e..272aa258c036 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -194,6 +194,22 @@ enum { MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18 }; +enum { + MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0 +}; + +/* bit enums for an 8-bit flags field indicating special use + * QPs which require special handling in qp_reserve_range. + * Currently, this only includes QPs used by the ETH interface, + * where we expect to use blueflame. These QPs must not have + * bits 6 and 7 set in their qp number. + * + * This enum may use only bits 0..7. + */ +enum { + MLX4_RESERVE_ETH_BF_QP = 1 << 7, +}; + enum { MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1, @@ -501,6 +517,7 @@ struct mlx4_caps { u64 phys_port_id[MLX4_MAX_PORTS + 1]; int tunnel_offload_mode; u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1]; + u8 alloc_res_qp_mask; }; struct mlx4_buf_list { @@ -950,8 +967,8 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); - -int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, + int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, -- cgit From d57febe1a47801ef8a55dbf10672850523dfaa60 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:57:57 +0200 Subject: net/mlx4: Add A0 hybrid steering A0 hybrid steering is a form of high performance flow steering. By using this mode, mlx4 cards use a fast limited table based steering, in order to enable fast steering of unicast packets to a QP. In order to implement A0 hybrid steering we allocate resources from different zones: (1) General range (2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region. When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP, we try hard to allocate the QP from range (2). Otherwise, we try hard not to allocate from this range. However, when the system is pushed to its limits and one needs every resource, the allocator uses every region it can. Meaning, when we run out of raw-eth qps, the allocator allocates from the general range (and the special-A0 area is no longer active). If we run out of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that is also exhausted, the allocator will allocate from the general range (and the A0 region is no longer active). Note that if a raw-eth qp is allocated from the general range, it attempts to allocate the range such that bits 6 and 7 (blueflame bits) in the QP number are not set. When the feature is used in SRIOV, the VF has to notify the PF what kind of QP attributes it needs. In order to do that, along with the "Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According to the combination of these bits, the PF tries to allocate a suitable QP. In order to maintain backward compatibility (with older PFs), the PF notifies which QP attributes it supports via QUERY_FUNC_CAP command. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 272aa258c036..39890cddc5fa 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -195,7 +195,8 @@ enum { }; enum { - MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0 + MLX4_QUERY_FUNC_FLAGS_BF_RES_QP = 1LL << 0, + MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; /* bit enums for an 8-bit flags field indicating special use @@ -207,6 +208,7 @@ enum { * This enum may use only bits 0..7. */ enum { + MLX4_RESERVE_A0_QP = 1 << 6, MLX4_RESERVE_ETH_BF_QP = 1 << 7, }; @@ -349,6 +351,8 @@ enum { enum mlx4_qp_region { MLX4_QP_REGION_FW = 0, + MLX4_QP_REGION_RSS_RAW_ETH, + MLX4_QP_REGION_BOTTOM = MLX4_QP_REGION_RSS_RAW_ETH, MLX4_QP_REGION_ETH_ADDR, MLX4_QP_REGION_FC_ADDR, MLX4_QP_REGION_FC_EXCH, @@ -891,7 +895,9 @@ static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev) && + qpn >= dev->phys_caps.base_sqpn) || + (qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) -- cgit From 7d077cd34eabb2ffd05abe0f2cad01da1ef11712 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:58:00 +0200 Subject: net/mlx4: Add support for A0 steering Add the required firmware commands for A0 steering and a way to enable that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT, QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used to configure and query the device. The different A0 DMFS (steering) modes are: Static - optimized performance, but flow steering rules are limited. This mode should be choosed explicitly by the user in order to be used. Dynamic - this mode should be explicitly choosed by the user. In this mode, the FW works in optimized steering mode as long as it can and afterwards automatically drops to classic (full) DMFS. Disable - this mode should be explicitly choosed by the user. The user instructs the system not to use optimized steering, even if the FW supports Dynamic A0 DMFS (and thus will be able to use optimized steering in Default A0 DMFS mode). Default - this mode is implicitly choosed. In this mode, if the FW supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll work at Disable A0 DMFS mode. Under SRIOV configuration, when the A0 steering mode is enabled, older guest VF drivers who aren't using the RX QP allocation flag (MLX4_RESERVE_A0_QP) will get a QP from the general range and fail when attempting to register a steering rule. To avoid that, the PF context behaviour is changed once on A0 static mode, to require support for the allocation flag in VF drivers too. In order to enable A0 steering, we use log_num_mgm_entry_size param. If the value of the parameter is not positive, we treat the absolute value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this bit field enables static A0 steering. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 39890cddc5fa..25c791e295fd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -117,6 +117,14 @@ enum { MLX4_STEERING_MODE_DEVICE_MANAGED }; +enum { + MLX4_STEERING_DMFS_A0_DEFAULT, + MLX4_STEERING_DMFS_A0_DYNAMIC, + MLX4_STEERING_DMFS_A0_STATIC, + MLX4_STEERING_DMFS_A0_DISABLE, + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED +}; + static inline const char *mlx4_steering_mode_str(int steering_mode) { switch (steering_mode) { @@ -191,7 +199,8 @@ enum { MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 15, MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, - MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18 + MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, + MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19 }; enum { @@ -225,7 +234,8 @@ enum { enum { MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0, - MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1 + MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1, + MLX4_FUNC_CAP_DMFS_A0_STATIC = 1L << 2 }; @@ -482,6 +492,7 @@ struct mlx4_caps { int reserved_mcgs; int num_qp_per_mgm; int steering_mode; + int dmfs_high_steer_mode; int fs_log_max_ucast_qp_range_size; int num_pds; int reserved_pds; @@ -522,6 +533,8 @@ struct mlx4_caps { int tunnel_offload_mode; u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1]; u8 alloc_res_qp_mask; + u32 dmfs_high_rate_qpn_base; + u32 dmfs_high_rate_qpn_range; }; struct mlx4_buf_list { -- cgit