From 7b6f08771bf6045c32a2a1e18201c1aeeb78d72a Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Wed, 7 Sep 2022 10:34:30 +0300
Subject: wifi: ieee80211: Support validating ML station profile length

Add a function to validate EHT Multi-Link per station profile length.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 79690938d9a2..bdf668f9dace 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4685,6 +4685,46 @@ struct ieee80211_mle_per_sta_profile {
 	u8 variable[];
 } __packed;
 
+/**
+ * ieee80211_mle_sta_prof_size_ok - validate multi-link element sta profile size
+ * @data: pointer to the sub element data
+ * @len: length of the containing sub element
+ */
+static inline bool ieee80211_mle_sta_prof_size_ok(const u8 *data, size_t len)
+{
+	const struct ieee80211_mle_per_sta_profile *prof = (const void *)data;
+	u16 control;
+	u8 fixed = sizeof(*prof);
+	u8 info_len = 1;
+
+	if (len < fixed)
+		return false;
+
+	control = le16_to_cpu(prof->control);
+
+	if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT)
+		info_len += 6;
+	if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT)
+		info_len += 2;
+	if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT)
+		info_len += 8;
+	if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
+		info_len += 2;
+	if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)
+		info_len += 1;
+
+	if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
+	    control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
+		if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
+			info_len += 2;
+		else
+			info_len += 1;
+	}
+
+	return prof->sta_info_len >= info_len &&
+	       fixed + prof->sta_info_len <= len;
+}
+
 #define for_each_mle_subelement(_elem, _data, _len)			\
 	if (ieee80211_mle_size_ok(_data, _len))				\
 		for_each_element(_elem,					\
-- 
cgit 


From 1403b109c9a5244dc6ab79154f04eecc209ef3d2 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Wed, 7 Sep 2022 17:23:09 +0300
Subject: wifi: cfg80211/mac80211: Fix ML element common size calculation

The common size is part of the length in the data
so don't add it again.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index bdf668f9dace..442b13333da8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4573,18 +4573,17 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 
 	switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) {
 	case IEEE80211_ML_CONTROL_TYPE_BASIC:
-		common += sizeof(struct ieee80211_mle_basic_common_info);
-		break;
 	case IEEE80211_ML_CONTROL_TYPE_PREQ:
-		common += sizeof(struct ieee80211_mle_preq_common_info);
+	case IEEE80211_ML_CONTROL_TYPE_TDLS:
+		/*
+		 * The length is the first octet pointed by mle->variable so no
+		 * need to add anything
+		 */
 		break;
 	case IEEE80211_ML_CONTROL_TYPE_RECONF:
 		if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR)
 			common += ETH_ALEN;
 		return common;
-	case IEEE80211_ML_CONTROL_TYPE_TDLS:
-		common += sizeof(struct ieee80211_mle_tdls_common_info);
-		break;
 	case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
 		if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR)
 			common += ETH_ALEN;
-- 
cgit 


From fb99c7d4d6d0fb4fe5a953e0c5f6c37a5b796b98 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Sun, 11 Sep 2022 16:55:12 +0300
Subject: wifi: cfg80211/mac80211: Fix ML element common size validation

The Multi-Link element can be fragmented, thus its size can exceed 254.
Thus, modify ieee80211_mle_size_ok() to use 'size_t len' instead of
'u8 len'.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 442b13333da8..b935a85b2f44 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4601,7 +4601,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
  * @data: pointer to the element data
  * @len: length of the containing element
  */
-static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len)
+static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
 {
 	const struct ieee80211_multi_link_elem *mle = (const void *)data;
 	u8 fixed = sizeof(*mle);
-- 
cgit 


From 45ebac4f059b92906e7e86dd1a780739f883857c Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Tue, 6 Sep 2022 11:48:56 +0300
Subject: wifi: mac80211: Parse station profile from association response

When processing an association response frame for a Multi-Link
connection, extract the per station profile for each additional
link, and use it for parsing the link elements.

As the Multi-Link element might be fragmented, add support for
reassembling a fragmented element. To simplify memory management
logic, extend 'struct ieee802_11_elems' to hold a scratch buffer,
which is used for the defragmentation. Once an element is
reconstructed in the scratch area, point the corresponding element
pointer to it. Currently only defragmentation of Multi-Link element
and the contained per-STA profile subelement is supported.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b935a85b2f44..f51e939f28f2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4666,6 +4666,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
 
 enum ieee80211_mle_subelems {
 	IEEE80211_MLE_SUBELEM_PER_STA_PROFILE		= 0,
+	IEEE80211_MLE_SUBELEM_FRAGMENT		        = 254,
 };
 
 #define IEEE80211_MLE_STA_CONTROL_LINK_ID			0x000f
-- 
cgit 


From 1177aaa7fe9373c762cd5bf5f5de8517bac989d5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 17 Sep 2022 03:14:53 +0200
Subject: wifi: fix multi-link element subelement iteration

The subelements obviously start after the common data, including
the common multi-link element structure definition itself. This
bug was possibly just hidden by the higher bits of the control
being set to 0, so the iteration just found one bogus element
and most of the code could continue anyway.

Fixes: 0f48b8b88aa9 ("wifi: ieee80211: add definitions for multi-link element")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f51e939f28f2..6252f02f38b7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4593,7 +4593,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 		return 0;
 	}
 
-	return common + mle->variable[0];
+	return sizeof(*mle) + common + mle->variable[0];
 }
 
 /**
-- 
cgit 


From a6a6163b9a934df5965792d60af1f53aa51fdd39 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 7 Oct 2022 10:53:03 +0200
Subject: mac802154: Introduce filtering levels

The 802154 specification details several filtering levels in which the
PHY and the MAC could be. The amount of filtering will vary if they are
in promiscuous mode or in scanning mode. Otherwise they are expected to
do some very basic checks, such as enforcing the frame validity. Either
the PHY is able to do so, and the MAC has nothing to do, or the PHY has
a lower filtering level than expected and the MAC should take over.

For now we just define these levels in an enumeration.

In a second time, we will add a per-PHY parameter showing the expected
filtering level as well as a per device current filtering level, and
will initialize all these fields.

In a third time, we will use them to apply more filtering by software
when the PHY is limited.

Indeed, if the drivers know they cannot reach the requested level of
filtering, they will overwrite the "current filtering" parameter so that
it reflects what they do. Then, in the core, the expected filtering
level will be used to decide whether some additional software processing
is needed or not.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20221007085310.503366-2-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/linux/ieee802154.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index f1f9412b6ac6..0303eb84d596 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -276,6 +276,30 @@ enum {
 	IEEE802154_SYSTEM_ERROR = 0xff,
 };
 
+/**
+ * enum ieee802154_filtering_level - Filtering levels applicable to a PHY
+ *
+ * @IEEE802154_FILTERING_NONE: No filtering at all, what is received is
+ *	forwarded to the softMAC
+ * @IEEE802154_FILTERING_1_FCS: First filtering level, frames with an invalid
+ *	FCS should be dropped
+ * @IEEE802154_FILTERING_2_PROMISCUOUS: Second filtering level, promiscuous
+ *	mode as described in the spec, identical in terms of filtering to the
+ *	level one on PHY side, but at the MAC level the frame should be
+ *	forwarded to the upper layer directly
+ * @IEEE802154_FILTERING_3_SCAN: Third filtering level, scan related, where
+ *	only beacons must be processed, all remaining traffic gets dropped
+ * @IEEE802154_FILTERING_4_FRAME_FIELDS: Fourth filtering level actually
+ *	enforcing the validity of the content of the frame with various checks
+ */
+enum ieee802154_filtering_level {
+	IEEE802154_FILTERING_NONE,
+	IEEE802154_FILTERING_1_FCS,
+	IEEE802154_FILTERING_2_PROMISCUOUS,
+	IEEE802154_FILTERING_3_SCAN,
+	IEEE802154_FILTERING_4_FRAME_FIELDS,
+};
+
 /* frame control handling */
 #define IEEE802154_FCTL_FTYPE		0x0003
 #define IEEE802154_FCTL_ACKREQ		0x0020
-- 
cgit 


From e6c86c513f440bec5f1046539c7e3c6c653842da Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 14 Oct 2022 19:39:43 +0800
Subject: rcu-tasks: Provide rcu_trace_implies_rcu_gp()

As an accident of implementation, an RCU Tasks Trace grace period also
acts as an RCU grace period.  However, this could change at any time.
This commit therefore creates an rcu_trace_implies_rcu_gp() that currently
returns true to codify this accident.  Code relying on this accident
must call this function to verify that this accident is still happening.

Reported-by: Hou Tao <houtao@huaweicloud.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Link: https://lore.kernel.org/r/20221014113946.965131-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/rcupdate.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 08605ce7379d..8822f06e4b40 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -240,6 +240,18 @@ static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 
+/**
+ * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
+ *
+ * As an accident of implementation, an RCU Tasks Trace grace period also
+ * acts as an RCU grace period.  However, this could change at any time.
+ * Code relying on this accident must call this function to verify that
+ * this accident is still happening.
+ *
+ * You have been warned!
+ */
+static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
+
 /**
  * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
  *
-- 
cgit 


From a2fd08448f2b7591fc7ec8dec2e5e025a43f0cee Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 17 Oct 2022 14:18:26 +0200
Subject: net: remove smc911x driver

This driver was used on Arm and SH machines until 2009, when the
last platforms moved to the smsc911x driver for the same hardware.

Time to retire this version.

Link: https://lore.kernel.org/netdev/1232010482-3744-1-git-send-email-steve.glendinning@smsc.com/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20221017121900.3520108-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/smc911x.h | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 include/linux/smc911x.h

(limited to 'include/linux')

diff --git a/include/linux/smc911x.h b/include/linux/smc911x.h
deleted file mode 100644
index 8cace8189e74..000000000000
--- a/include/linux/smc911x.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SMC911X_H__
-#define __SMC911X_H__
-
-#define SMC911X_USE_16BIT (1 << 0)
-#define SMC911X_USE_32BIT (1 << 1)
-
-struct smc911x_platdata {
-	unsigned long flags;
-	unsigned long irq_flags; /* IRQF_... */
-	int irq_polarity;
-};
-
-#endif /* __SMC911X_H__ */
-- 
cgit 


From f392a1846489720fc2e063d1210633b6cf4ec5a4 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 17 Oct 2022 16:22:35 -0400
Subject: net: phylink: provide phylink_validate_mask_caps() helper

Provide a helper that restricts the link modes according to the
phylink capabilities.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
[rebased on net-next/master and added documentation]
Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 664dd409feb9..c29c3f174972 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -556,6 +556,9 @@ void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps);
 unsigned long phylink_get_capabilities(phy_interface_t interface,
 				       unsigned long mac_capabilities,
 				       int rate_matching);
+void phylink_validate_mask_caps(unsigned long *supported,
+				struct phylink_link_state *state,
+				unsigned long caps);
 void phylink_generic_validate(struct phylink_config *config,
 			      unsigned long *supported,
 			      struct phylink_link_state *state);
-- 
cgit 


From 51c352bdbcd23d7ce46b06c1e64c82754dc44044 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Tue, 18 Oct 2022 15:37:27 +0100
Subject: netlink: add support for formatted extack messages

Include an 80-byte buffer in struct netlink_ext_ack that can be used
 for scnprintf()ed messages.  This does mean that the resulting string
 can't be enumerated, translated etc. in the way NL_SET_ERR_MSG() was
 designed to allow.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index d51e041d2242..d81bde5a5844 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -64,6 +64,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
 
 /* this can be increased when necessary - don't expose to userland */
 #define NETLINK_MAX_COOKIE_LEN	20
+#define NETLINK_MAX_FMTMSG_LEN	80
 
 /**
  * struct netlink_ext_ack - netlink extended ACK report struct
@@ -75,6 +76,8 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
  * @miss_nest: nest missing an attribute (%NULL if missing top level attr)
  * @cookie: cookie data to return to userspace (for success)
  * @cookie_len: actual cookie data length
+ * @_msg_buf: output buffer for formatted message strings - don't access
+ *	directly, use %NL_SET_ERR_MSG_FMT
  */
 struct netlink_ext_ack {
 	const char *_msg;
@@ -84,13 +87,13 @@ struct netlink_ext_ack {
 	u16 miss_type;
 	u8 cookie[NETLINK_MAX_COOKIE_LEN];
 	u8 cookie_len;
+	char _msg_buf[NETLINK_MAX_FMTMSG_LEN];
 };
 
 /* Always use this macro, this allows later putting the
  * message into a separate section or such for things
  * like translation or listing all possible messages.
- * Currently string formatting is not supported (due
- * to the lack of an output buffer.)
+ * If string formatting is needed use NL_SET_ERR_MSG_FMT.
  */
 #define NL_SET_ERR_MSG(extack, msg) do {		\
 	static const char __msg[] = msg;		\
@@ -102,9 +105,31 @@ struct netlink_ext_ack {
 		__extack->_msg = __msg;			\
 } while (0)
 
+/* We splice fmt with %s at each end even in the snprintf so that both calls
+ * can use the same string constant, avoiding its duplication in .ro
+ */
+#define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do {			       \
+	struct netlink_ext_ack *__extack = (extack);			       \
+									       \
+	if (!__extack)							       \
+		break;							       \
+	if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN,	       \
+		     "%s" fmt "%s", "", ##args, "") >=			       \
+	    NETLINK_MAX_FMTMSG_LEN)					       \
+		net_warn_ratelimited("%s" fmt "%s", "truncated extack: ",      \
+				     ##args, "\n");			       \
+									       \
+	do_trace_netlink_extack(__extack->_msg_buf);			       \
+									       \
+	__extack->_msg = __extack->_msg_buf;				       \
+} while (0)
+
 #define NL_SET_ERR_MSG_MOD(extack, msg)			\
 	NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
 
+#define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...)	\
+	NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args)
+
 #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do {	\
 	if ((extack)) {					\
 		(extack)->bad_attr = (attr);		\
-- 
cgit 


From a5ef058dc4d9a3e60d1808a0700e18e0e37e408e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 20 Oct 2022 19:48:51 +0200
Subject: net: introduce and use custom sockopt socket flag

We will soon introduce custom setsockopt for UDP sockets, too.
Instead of doing even more complex arbitrary checks inside
sock_use_custom_sol_socket(), add a new socket flag and set it
for the relevant socket types (currently only MPTCP).

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 711c3593c3b8..59350fd85823 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,7 @@ struct net;
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
+#define SOCK_CUSTOM_SOCKOPT	5
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
-- 
cgit 


From 8a3854c7b8e4532063b14bed34115079b7d0cb36 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 20 Oct 2022 19:48:52 +0200
Subject: udp: track the forward memory release threshold in an hot cacheline

When the receiver process and the BH runs on different cores,
udp_rmem_release() experience a cache miss while accessing sk_rcvbuf,
as the latter shares the same cacheline with sk_forward_alloc, written
by the BH.

With this patch, UDP tracks the rcvbuf value and its update via custom
SOL_SOCKET socket options, and copies the forward memory threshold value
used by udp_rmem_release() in a different cacheline, already accessed by
the above function and uncontended.

Since the UDP socket init operation grown a bit, factor out the common
code between v4 and v6 in a shared helper.

Overall the above give a 10% peek throughput increase under UDP flood.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index e96da4157d04..5cdba00a904a 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -87,6 +87,9 @@ struct udp_sock {
 
 	/* This field is dirtied by udp_recvmsg() */
 	int		forward_deficit;
+
+	/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
+	int		forward_threshold;
 };
 
 #define UDP_MAX_SEGMENTS	(1 << 6UL)
-- 
cgit 


From 4727bab4e9bbeafeff6acdfcb077a7a548cbde30 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 21 Oct 2022 10:58:22 +0800
Subject: net: skb: move skb_pp_recycle() to skbuff.c

skb_pp_recycle() is only used by skb_free_head() in
skbuff.c, so move it to skbuff.c.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7be5bb4c94b6..59c9fd55699d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5050,12 +5050,5 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
 }
 #endif
 
-static inline bool skb_pp_recycle(struct sk_buff *skb, void *data)
-{
-	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
-		return false;
-	return page_pool_return_skb_page(virt_to_page(data));
-}
-
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
-- 
cgit 


From 398900498485fa9465386454681763d81efaad25 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 21 Oct 2022 16:09:59 +0100
Subject: net: sfp: provide a definition for the power level select bit

Provide a named definition for the power level select bit in the
extended status register, rather than using BIT(0) in the code.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index d1f343853b6c..01ae9f1dd2ad 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -489,6 +489,8 @@ enum {
 	SFP_WARN1_RXPWR_LOW		= BIT(6),
 
 	SFP_EXT_STATUS			= 0x76,
+	SFP_EXT_STATUS_PWRLVL_SELECT	= BIT(0),
+
 	SFP_VSL				= 0x78,
 	SFP_PAGE			= 0x7f,
 };
-- 
cgit 


From 73feb8d5fa3b755bb51077c0aabfb6aa556fd498 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Oct 2022 15:41:41 +0200
Subject: kallsyms: Make module_kallsyms_on_each_symbol generally available

Making module_kallsyms_on_each_symbol generally available, so it
can be used outside CONFIG_LIVEPATCH option in following changes.

Rather than adding another ifdef option let's make the function
generally available (when CONFIG_KALLSYMS and CONFIG_MODULES
options are defined).

Cc: Christoph Hellwig <hch@lst.de>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221025134148.3300700-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/module.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index ec61fb53979a..35876e89eb93 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -879,8 +879,17 @@ static inline bool module_sig_ok(struct module *module)
 }
 #endif	/* CONFIG_MODULE_SIG */
 
+#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS)
 int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 					     struct module *, unsigned long),
 				   void *data);
+#else
+static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+						 struct module *, unsigned long),
+						 void *data)
+{
+	return -EOPNOTSUPP;
+}
+#endif  /* CONFIG_MODULES && CONFIG_KALLSYMS */
 
 #endif /* _LINUX_MODULE_H */
-- 
cgit 


From b5f0de6df6dce8d641ef58ef7012f3304dffb9a1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 18 Oct 2022 02:56:03 -0700
Subject: net: dev: Convert sa_data to flexible array in struct sockaddr

One of the worst offenders of "fake flexible arrays" is struct sockaddr,
as it is the classic example of why GCC and Clang have been traditionally
forced to treat all trailing arrays as fake flexible arrays: in the
distant misty past, sa_data became too small, and code started just
treating it as a flexible array, even though it was fixed-size. The
special case by the compiler is specifically that sizeof(sa->sa_data)
and FORTIFY_SOURCE (which uses __builtin_object_size(sa->sa_data, 1))
do not agree (14 and -1 respectively), which makes FORTIFY_SOURCE treat
it as a flexible array.

However, the coming -fstrict-flex-arrays compiler flag will remove
these special cases so that FORTIFY_SOURCE can gain coverage over all
the trailing arrays in the kernel that are _not_ supposed to be treated
as a flexible array. To deal with this change, convert sa_data to a true
flexible array. To keep the structure size the same, move sa_data into
a union with a newly introduced sa_data_min with the original size. The
result is that FORTIFY_SOURCE can continue to have no idea how large
sa_data may actually be, but anything using sizeof(sa->sa_data) must
switch to sizeof(sa->sa_data_min).

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: Dylan Yudaken <dylany@fb.com>
Cc: Yajun Deng <yajun.deng@linux.dev>
Cc: Petr Machata <petrm@nvidia.com>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: syzbot <syzkaller@googlegroups.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221018095503.never.671-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index de3701a2a212..13c3a237b9c9 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -33,7 +33,10 @@ typedef __kernel_sa_family_t	sa_family_t;
 
 struct sockaddr {
 	sa_family_t	sa_family;	/* address family, AF_xxx	*/
-	char		sa_data[14];	/* 14 bytes of protocol address	*/
+	union {
+		char sa_data_min[14];		/* Minimum 14 bytes of protocol address	*/
+		DECLARE_FLEX_ARRAY(char, sa_data);
+	};
 };
 
 struct linger {
-- 
cgit 


From 271de525e1d7f564e88a9d212c50998b49a54476 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:16 -0700
Subject: bpf: Remove prog->active check for bpf_lsm and bpf_iter

The commit 64696c40d03c ("bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline")
removed prog->active check for struct_ops prog.  The bpf_lsm
and bpf_iter is also using trampoline.  Like struct_ops, the bpf_lsm
and bpf_iter have fixed hooks for the prog to attach.  The
kernel does not call the same hook in a recursive way.
This patch also removes the prog->active check for
bpf_lsm and bpf_iter.

A later patch has a test to reproduce the recursion issue
for a sleepable bpf_lsm program.

This patch appends the '_recur' naming to the existing
enter and exit functions that track the prog->active counter.
New __bpf_prog_{enter,exit}[_sleepable] function are
added to skip the prog->active tracking. The '_struct_ops'
version is also removed.

It also moves the decision on picking the enter and exit function to
the new bpf_trampoline_{enter,exit}().  It returns the '_recur' ones
for all tracing progs to use.  For bpf_lsm, bpf_iter,
struct_ops (no prog->active tracking after 64696c40d03c), and
bpf_lsm_cgroup (no prog->active tracking after 69fd337a975c7),
it will return the functions that don't track the prog->active.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 24 ++++++++++--------------
 include/linux/bpf_verifier.h | 15 ++++++++++++++-
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9e7d46d16032..1279e699dc98 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -854,22 +854,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
 				const struct btf_func_model *m, u32 flags,
 				struct bpf_tramp_links *tlinks,
 				void *orig_call);
-/* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
-				       struct bpf_tramp_run_ctx *run_ctx);
-u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
-					struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
-					struct bpf_tramp_run_ctx *run_ctx);
-u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
-					struct bpf_tramp_run_ctx *run_ctx);
-void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
-					struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
+					     struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
+					     struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
+typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog,
+				      struct bpf_tramp_run_ctx *run_ctx);
+typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
+				      struct bpf_tramp_run_ctx *run_ctx);
+bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
+bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
 
 struct bpf_ksym {
 	unsigned long		 start;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e1e6965f407..1a32baa78ce2 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -642,10 +642,23 @@ static inline u32 type_flag(u32 type)
 }
 
 /* only use after check_attach_btf_id() */
-static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
 {
 	return prog->type == BPF_PROG_TYPE_EXT ?
 		prog->aux->dst_prog->type : prog->type;
 }
 
+static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
+{
+	switch (resolve_prog_type(prog)) {
+	case BPF_PROG_TYPE_TRACING:
+		return prog->expected_attach_type != BPF_TRACE_ITER;
+	case BPF_PROG_TYPE_STRUCT_OPS:
+	case BPF_PROG_TYPE_LSM:
+		return false;
+	default:
+		return true;
+	}
+}
+
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit 


From 0593dd34e53489557569d5e6d27371b49aa9b41f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:17 -0700
Subject: bpf: Append _recur naming to the bpf_task_storage helper proto

This patch adds the "_recur" naming to the bpf_task_storage_{get,delete}
proto.  In a latter patch, they will only be used by the tracing
programs that requires a deadlock detection because a tracing
prog may use bpf_task_storage_{get,delete} recursively and cause a
deadlock.

Another following patch will add a different helper proto for the non
tracing programs because they do not need the deadlock prevention.
This patch does this rename to prepare for this future proto
additions.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-3-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1279e699dc98..b04fe3f4342e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2519,8 +2519,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 extern const struct bpf_func_proto bpf_sock_from_file_proto;
 extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
-- 
cgit 


From 4279adb094a17132423f1271c3d11b593fc2327e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:20 -0700
Subject: bpf: Add new bpf_task_storage_get proto with no deadlock detection

The bpf_lsm and bpf_iter do not recur that will cause a deadlock.
The situation is similar to the bpf_pid_task_storage_lookup_elem()
which is called from the syscall map_lookup_elem.  It does not need
deadlock detection.  Otherwise, it will cause unnecessary failure
when calling the bpf_task_storage_get() helper.

This patch adds bpf_task_storage_get proto that does not do deadlock
detection.  It will be used by bpf_lsm and bpf_iter programs.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-6-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b04fe3f4342e..ef3f98afa45d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2520,6 +2520,7 @@ extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 extern const struct bpf_func_proto bpf_sock_from_file_proto;
 extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
 extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
-- 
cgit 


From 8a7dac37f27a3dfbd814bf29a73d6417db2c81d9 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:22 -0700
Subject: bpf: Add new bpf_task_storage_delete proto with no deadlock detection

The bpf_lsm and bpf_iter do not recur that will cause a deadlock.
The situation is similar to the bpf_pid_task_storage_delete_elem()
which is called from the syscall map_delete_elem.  It does not need
deadlock detection.  Otherwise, it will cause unnecessary failure
when calling the bpf_task_storage_delete() helper.

This patch adds bpf_task_storage_delete proto that does not do deadlock
detection.  It will be used by bpf_lsm and bpf_iter program.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-8-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ef3f98afa45d..a5dbac8f5aba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2522,6 +2522,7 @@ extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
 extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
 extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
-- 
cgit 


From 5e67b8ef125bb6e83bf0f0442ad7ffc09e7956f9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:28:40 -0700
Subject: bpf: Make struct cgroup btf id global

Make struct cgroup btf id global so later patch can reuse
the same btf id.

Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042840.672602-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 2aea877d644f..c9744efd202f 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -265,5 +265,6 @@ MAX_BTF_TRACING_TYPE,
 };
 
 extern u32 btf_tracing_ids[];
+extern u32 bpf_cgroup_btf_id[];
 
 #endif
-- 
cgit 


From c83597fa5dc6b322e9bdf929e5f4136a3f4aa4db Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:28:45 -0700
Subject: bpf: Refactor some inode/task/sk storage functions for reuse

Refactor codes so that inode/task/sk storage implementation
can maximally share the same code. I also added some comments
in new function bpf_local_storage_unlink_nolock() to make
codes easy to understand. There is no functionality change.

Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042845.672944-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_local_storage.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 7ea18d4da84b..6d37a40cd90e 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = {			\
 	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
 }
 
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
-void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx);
-
 /* Helper functions for bpf_local_storage */
 int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
 
-struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+struct bpf_map *
+bpf_local_storage_map_alloc(union bpf_attr *attr,
+			    struct bpf_local_storage_cache *cache);
 
 struct bpf_local_storage_data *
 bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 			 struct bpf_local_storage_map *smap,
 			 bool cacheit_lockit);
 
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage);
+
+void bpf_local_storage_map_free(struct bpf_map *map,
+				struct bpf_local_storage_cache *cache,
 				int __percpu *busy_counter);
 
 int bpf_local_storage_map_check_btf(const struct bpf_map *map,
@@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
 				   struct bpf_local_storage_elem *selem);
 
-bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
-				     struct bpf_local_storage_elem *selem,
-				     bool uncharge_omem, bool use_trace_rcu);
-
 void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
 
 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
-- 
cgit 


From c4bcfb38a95edb1021a53f2d0356a78120ecfbe4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:28:50 -0700
Subject: bpf: Implement cgroup storage available to non-cgroup-attached bpf
 progs

Similar to sk/inode/task storage, implement similar cgroup local storage.

There already exists a local storage implementation for cgroup-attached
bpf programs.  See map type BPF_MAP_TYPE_CGROUP_STORAGE and helper
bpf_get_local_storage(). But there are use cases such that non-cgroup
attached bpf progs wants to access cgroup local storage data. For example,
tc egress prog has access to sk and cgroup. It is possible to use
sk local storage to emulate cgroup local storage by storing data in socket.
But this is a waste as it could be lots of sockets belonging to a particular
cgroup. Alternatively, a separate map can be created with cgroup id as the key.
But this will introduce additional overhead to manipulate the new map.
A cgroup local storage, similar to existing sk/inode/task storage,
should help for this use case.

The life-cycle of storage is managed with the life-cycle of the
cgroup struct.  i.e. the storage is destroyed along with the owning cgroup
with a call to bpf_cgrp_storage_free() when cgroup itself
is deleted.

The userspace map operations can be done by using a cgroup fd as a key
passed to the lookup, update and delete operations.

Typically, the following code is used to get the current cgroup:
    struct task_struct *task = bpf_get_current_task_btf();
    ... task->cgroups->dfl_cgrp ...
and in structure task_struct definition:
    struct task_struct {
        ....
        struct css_set __rcu            *cgroups;
        ....
    }
With sleepable program, accessing task->cgroups is not protected by rcu_read_lock.
So the current implementation only supports non-sleepable program and supporting
sleepable program will be the next step together with adding rcu_read_lock
protection for rcu tagged structures.

Since map name BPF_MAP_TYPE_CGROUP_STORAGE has been used for old cgroup local
storage support, the new map name BPF_MAP_TYPE_CGRP_STORAGE is used
for cgroup storage available to non-cgroup-attached bpf programs. The old
cgroup storage supports bpf_get_local_storage() helper to get the cgroup data.
The new cgroup storage helper bpf_cgrp_storage_get() can provide similar
functionality. While old cgroup storage pre-allocates storage memory, the new
mechanism can also pre-allocate with a user space bpf_map_update_elem() call
to avoid potential run-time memory allocation failure.
Therefore, the new cgroup storage can provide all functionality w.r.t.
the old one. So in uapi bpf.h, the old BPF_MAP_TYPE_CGROUP_STORAGE is alias to
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED to indicate the old cgroup storage can
be deprecated since the new one can provide the same functionality.

Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042850.673791-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h         | 7 +++++++
 include/linux/bpf_types.h   | 1 +
 include/linux/cgroup-defs.h | 4 ++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a5dbac8f5aba..9fd68b0b3e9c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2041,6 +2041,7 @@ struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 void bpf_task_storage_free(struct task_struct *task);
+void bpf_cgrp_storage_free(struct cgroup *cgroup);
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
 const struct btf_func_model *
 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
@@ -2295,6 +2296,10 @@ static inline bool has_current_bpf_ctx(void)
 static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
 {
 }
+
+static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2535,6 +2540,8 @@ extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 extern const struct bpf_func_proto bpf_set_retval_proto;
 extern const struct bpf_func_proto bpf_get_retval_proto;
 extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_get_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2c6a4f2562a7..d4ee3ccd3753 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8f481d1b159a..c466fdc3a32a 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -504,6 +504,10 @@ struct cgroup {
 	/* Used to store internal freezer state */
 	struct cgroup_freezer_state freezer;
 
+#ifdef CONFIG_BPF_SYSCALL
+	struct bpf_local_storage __rcu  *bpf_cgrp_storage;
+#endif
+
 	/* All ancestors including self */
 	struct cgroup *ancestors[];
 };
-- 
cgit 


From e753df8fbca592d36f539ed950fcdf412166c549 Mon Sep 17 00:00:00 2001
From: Michal Jaron <michalx.jaron@intel.com>
Date: Tue, 25 Oct 2022 09:12:52 -0700
Subject: ice: Add support Flex RXD

Add new VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC flag, opcode
VIRTCHNL_OP_GET_SUPPORTED_RXDIDS and add member rxdid
in struct virtchnl_rxq_info to support AVF Flex RXD
extension.

Add support to allow VF to query flexible descriptor RXDIDs supported
by DDP package and configure Rx queues with selected RXDID for IAVF.

Add code to allow VIRTCHNL_OP_GET_SUPPORTED_RXDIDS message to be
processed. Add necessary macros for registers.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Xu Ting <ting.xu@intel.com>
Signed-off-by: Michal Jaron <michalx.jaron@intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20221025161252.1952939-1-jacob.e.keller@intel.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/avf/virtchnl.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2ce27e8e4f19..d91af50ac58d 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,7 +136,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
-	/* opcode 34 - 44 are reserved */
+	/* opcode 34 - 43 are reserved */
+	VIRTCHNL_OP_GET_SUPPORTED_RXDIDS = 44,
 	VIRTCHNL_OP_ADD_RSS_CFG = 45,
 	VIRTCHNL_OP_DEL_RSS_CFG = 46,
 	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
@@ -263,6 +264,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	BIT(22)
 #define VIRTCHNL_VF_OFFLOAD_ADQ			BIT(23)
 #define VIRTCHNL_VF_OFFLOAD_USO			BIT(25)
+#define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
 
@@ -318,7 +320,9 @@ struct virtchnl_rxq_info {
 	u16 splithdr_enabled; /* deprecated with AVF 1.0 */
 	u32 databuffer_size;
 	u32 max_pkt_size;
-	u32 pad1;
+	u8 pad0;
+	u8 rxdid;
+	u8 pad1[2];
 	u64 dma_ring_addr;
 	enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */
 	u32 pad2;
@@ -970,6 +974,10 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_supported_rxdids {
+	u64 supported_rxdids;
+};
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -1499,6 +1507,8 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
 		valid_len = sizeof(struct virtchnl_filter);
 		break;
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
+		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
 	case VIRTCHNL_OP_DEL_RSS_CFG:
 		valid_len = sizeof(struct virtchnl_rss_cfg);
-- 
cgit 


From 29c1c44646aec5d5134f2365259a84becc1ee7d3 Mon Sep 17 00:00:00 2001
From: Mubashir Adnan Qureshi <mubashirq@google.com>
Date: Wed, 26 Oct 2022 13:51:14 +0000
Subject: tcp: add u32 counter in tcp_sock and an SNMP counter for PLB

A u32 counter is added to tcp_sock for counting the number of PLB
triggered rehashes for a TCP connection. An SNMP counter is also
added to count overall PLB triggered rehash events for a host. These
counters are hooked up to PLB implementation for DCTCP.

TCP_NLA_REHASH is added to SCM_TIMESTAMPING_OPT_STATS that reports
the rehash attempts triggered due to PLB or timeouts. This gives
a historical view of sustained congestion or timeouts experienced
by the TCP connection.

Signed-off-by: Mubashir Adnan Qureshi <mubashirq@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 41b1da621a45..ca7f05a130d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -423,6 +423,7 @@ struct tcp_sock {
 		u32		  probe_seq_start;
 		u32		  probe_seq_end;
 	} mtu_probe;
+	u32     plb_rehash;     /* PLB-triggered rehash attempts */
 	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
 			   * while socket was owned by user.
 			   */
-- 
cgit 


From 9c5a170677c3c8facc83e931a57f4c99c0511ae0 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 27 Oct 2022 14:10:37 +0100
Subject: net: phylink: add phylink_get_link_timer_ns() helper

Add a helper to convert the PHY interface mode to the required link
timer setting as stated by the appropriate standard. Inappropriate
interface modes return an error.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 63800bf4a7ac..1df3e5e316e8 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -616,6 +616,30 @@ int phylink_speed_up(struct phylink *pl);
 
 void phylink_set_port_modes(unsigned long *bits);
 
+/**
+ * phylink_get_link_timer_ns - return the PCS link timer value
+ * @interface: link &typedef phy_interface_t mode
+ *
+ * Return the PCS link timer setting in nanoseconds for the PHY @interface
+ * mode, or -EINVAL if not appropriate.
+ */
+static inline int phylink_get_link_timer_ns(phy_interface_t interface)
+{
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+	case PHY_INTERFACE_MODE_USXGMII:
+		return 1600000;
+
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return 10000000;
+
+	default:
+		return -EINVAL;
+	}
+}
+
 void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
 				      u16 bmsr, u16 lpa);
 void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
-- 
cgit 


From 17dd361119e5bc4cfb85aed660674a846b613817 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 27 Oct 2022 14:21:16 +0100
Subject: net: sfp: convert register indexes from hex to decimal

The register indexes in the standards are in decimal rather than hex,
so lets specify them in decimal in the header file so we can easily
cross-reference without converting between hex and decimal.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 180 ++++++++++++++++++++++++++--------------------------
 1 file changed, 90 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 01ae9f1dd2ad..4e2db155664d 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -332,37 +332,37 @@ enum {
 
 /* SFP EEPROM registers */
 enum {
-	SFP_PHYS_ID			= 0x00,
-	SFP_PHYS_EXT_ID			= 0x01,
-	SFP_CONNECTOR			= 0x02,
-	SFP_COMPLIANCE			= 0x03,
-	SFP_ENCODING			= 0x0b,
-	SFP_BR_NOMINAL			= 0x0c,
-	SFP_RATE_ID			= 0x0d,
-	SFP_LINK_LEN_SM_KM		= 0x0e,
-	SFP_LINK_LEN_SM_100M		= 0x0f,
-	SFP_LINK_LEN_50UM_OM2_10M	= 0x10,
-	SFP_LINK_LEN_62_5UM_OM1_10M	= 0x11,
-	SFP_LINK_LEN_COPPER_1M		= 0x12,
-	SFP_LINK_LEN_50UM_OM4_10M	= 0x12,
-	SFP_LINK_LEN_50UM_OM3_10M	= 0x13,
-	SFP_VENDOR_NAME			= 0x14,
-	SFP_VENDOR_OUI			= 0x25,
-	SFP_VENDOR_PN			= 0x28,
-	SFP_VENDOR_REV			= 0x38,
-	SFP_OPTICAL_WAVELENGTH_MSB	= 0x3c,
-	SFP_OPTICAL_WAVELENGTH_LSB	= 0x3d,
-	SFP_CABLE_SPEC			= 0x3c,
-	SFP_CC_BASE			= 0x3f,
-	SFP_OPTIONS			= 0x40,	/* 2 bytes, MSB, LSB */
-	SFP_BR_MAX			= 0x42,
-	SFP_BR_MIN			= 0x43,
-	SFP_VENDOR_SN			= 0x44,
-	SFP_DATECODE			= 0x54,
-	SFP_DIAGMON			= 0x5c,
-	SFP_ENHOPTS			= 0x5d,
-	SFP_SFF8472_COMPLIANCE		= 0x5e,
-	SFP_CC_EXT			= 0x5f,
+	SFP_PHYS_ID			= 0,
+	SFP_PHYS_EXT_ID			= 1,
+	SFP_CONNECTOR			= 2,
+	SFP_COMPLIANCE			= 3,
+	SFP_ENCODING			= 11,
+	SFP_BR_NOMINAL			= 12,
+	SFP_RATE_ID			= 13,
+	SFP_LINK_LEN_SM_KM		= 14,
+	SFP_LINK_LEN_SM_100M		= 15,
+	SFP_LINK_LEN_50UM_OM2_10M	= 16,
+	SFP_LINK_LEN_62_5UM_OM1_10M	= 17,
+	SFP_LINK_LEN_COPPER_1M		= 18,
+	SFP_LINK_LEN_50UM_OM4_10M	= 18,
+	SFP_LINK_LEN_50UM_OM3_10M	= 19,
+	SFP_VENDOR_NAME			= 20,
+	SFP_VENDOR_OUI			= 37,
+	SFP_VENDOR_PN			= 40,
+	SFP_VENDOR_REV			= 56,
+	SFP_OPTICAL_WAVELENGTH_MSB	= 60,
+	SFP_OPTICAL_WAVELENGTH_LSB	= 61,
+	SFP_CABLE_SPEC			= 60,
+	SFP_CC_BASE			= 63,
+	SFP_OPTIONS			= 64,	/* 2 bytes, MSB, LSB */
+	SFP_BR_MAX			= 66,
+	SFP_BR_MIN			= 67,
+	SFP_VENDOR_SN			= 68,
+	SFP_DATECODE			= 84,
+	SFP_DIAGMON			= 92,
+	SFP_ENHOPTS			= 93,
+	SFP_SFF8472_COMPLIANCE		= 94,
+	SFP_CC_EXT			= 95,
 
 	SFP_PHYS_EXT_ID_SFP		= 0x04,
 	SFP_OPTIONS_HIGH_POWER_LEVEL	= BIT(13),
@@ -404,63 +404,63 @@ enum {
 /* SFP Diagnostics */
 enum {
 	/* Alarm and warnings stored MSB at lower address then LSB */
-	SFP_TEMP_HIGH_ALARM		= 0x00,
-	SFP_TEMP_LOW_ALARM		= 0x02,
-	SFP_TEMP_HIGH_WARN		= 0x04,
-	SFP_TEMP_LOW_WARN		= 0x06,
-	SFP_VOLT_HIGH_ALARM		= 0x08,
-	SFP_VOLT_LOW_ALARM		= 0x0a,
-	SFP_VOLT_HIGH_WARN		= 0x0c,
-	SFP_VOLT_LOW_WARN		= 0x0e,
-	SFP_BIAS_HIGH_ALARM		= 0x10,
-	SFP_BIAS_LOW_ALARM		= 0x12,
-	SFP_BIAS_HIGH_WARN		= 0x14,
-	SFP_BIAS_LOW_WARN		= 0x16,
-	SFP_TXPWR_HIGH_ALARM		= 0x18,
-	SFP_TXPWR_LOW_ALARM		= 0x1a,
-	SFP_TXPWR_HIGH_WARN		= 0x1c,
-	SFP_TXPWR_LOW_WARN		= 0x1e,
-	SFP_RXPWR_HIGH_ALARM		= 0x20,
-	SFP_RXPWR_LOW_ALARM		= 0x22,
-	SFP_RXPWR_HIGH_WARN		= 0x24,
-	SFP_RXPWR_LOW_WARN		= 0x26,
-	SFP_LASER_TEMP_HIGH_ALARM	= 0x28,
-	SFP_LASER_TEMP_LOW_ALARM	= 0x2a,
-	SFP_LASER_TEMP_HIGH_WARN	= 0x2c,
-	SFP_LASER_TEMP_LOW_WARN		= 0x2e,
-	SFP_TEC_CUR_HIGH_ALARM		= 0x30,
-	SFP_TEC_CUR_LOW_ALARM		= 0x32,
-	SFP_TEC_CUR_HIGH_WARN		= 0x34,
-	SFP_TEC_CUR_LOW_WARN		= 0x36,
-	SFP_CAL_RXPWR4			= 0x38,
-	SFP_CAL_RXPWR3			= 0x3c,
-	SFP_CAL_RXPWR2			= 0x40,
-	SFP_CAL_RXPWR1			= 0x44,
-	SFP_CAL_RXPWR0			= 0x48,
-	SFP_CAL_TXI_SLOPE		= 0x4c,
-	SFP_CAL_TXI_OFFSET		= 0x4e,
-	SFP_CAL_TXPWR_SLOPE		= 0x50,
-	SFP_CAL_TXPWR_OFFSET		= 0x52,
-	SFP_CAL_T_SLOPE			= 0x54,
-	SFP_CAL_T_OFFSET		= 0x56,
-	SFP_CAL_V_SLOPE			= 0x58,
-	SFP_CAL_V_OFFSET		= 0x5a,
-	SFP_CHKSUM			= 0x5f,
-
-	SFP_TEMP			= 0x60,
-	SFP_VCC				= 0x62,
-	SFP_TX_BIAS			= 0x64,
-	SFP_TX_POWER			= 0x66,
-	SFP_RX_POWER			= 0x68,
-	SFP_LASER_TEMP			= 0x6a,
-	SFP_TEC_CUR			= 0x6c,
-
-	SFP_STATUS			= 0x6e,
+	SFP_TEMP_HIGH_ALARM		= 0,
+	SFP_TEMP_LOW_ALARM		= 2,
+	SFP_TEMP_HIGH_WARN		= 4,
+	SFP_TEMP_LOW_WARN		= 6,
+	SFP_VOLT_HIGH_ALARM		= 8,
+	SFP_VOLT_LOW_ALARM		= 10,
+	SFP_VOLT_HIGH_WARN		= 12,
+	SFP_VOLT_LOW_WARN		= 14,
+	SFP_BIAS_HIGH_ALARM		= 16,
+	SFP_BIAS_LOW_ALARM		= 18,
+	SFP_BIAS_HIGH_WARN		= 20,
+	SFP_BIAS_LOW_WARN		= 22,
+	SFP_TXPWR_HIGH_ALARM		= 24,
+	SFP_TXPWR_LOW_ALARM		= 26,
+	SFP_TXPWR_HIGH_WARN		= 28,
+	SFP_TXPWR_LOW_WARN		= 30,
+	SFP_RXPWR_HIGH_ALARM		= 32,
+	SFP_RXPWR_LOW_ALARM		= 34,
+	SFP_RXPWR_HIGH_WARN		= 36,
+	SFP_RXPWR_LOW_WARN		= 38,
+	SFP_LASER_TEMP_HIGH_ALARM	= 40,
+	SFP_LASER_TEMP_LOW_ALARM	= 42,
+	SFP_LASER_TEMP_HIGH_WARN	= 44,
+	SFP_LASER_TEMP_LOW_WARN		= 46,
+	SFP_TEC_CUR_HIGH_ALARM		= 48,
+	SFP_TEC_CUR_LOW_ALARM		= 50,
+	SFP_TEC_CUR_HIGH_WARN		= 52,
+	SFP_TEC_CUR_LOW_WARN		= 54,
+	SFP_CAL_RXPWR4			= 56,
+	SFP_CAL_RXPWR3			= 60,
+	SFP_CAL_RXPWR2			= 64,
+	SFP_CAL_RXPWR1			= 68,
+	SFP_CAL_RXPWR0			= 72,
+	SFP_CAL_TXI_SLOPE		= 76,
+	SFP_CAL_TXI_OFFSET		= 78,
+	SFP_CAL_TXPWR_SLOPE		= 80,
+	SFP_CAL_TXPWR_OFFSET		= 82,
+	SFP_CAL_T_SLOPE			= 84,
+	SFP_CAL_T_OFFSET		= 86,
+	SFP_CAL_V_SLOPE			= 88,
+	SFP_CAL_V_OFFSET		= 90,
+	SFP_CHKSUM			= 95,
+
+	SFP_TEMP			= 96,
+	SFP_VCC				= 98,
+	SFP_TX_BIAS			= 100,
+	SFP_TX_POWER			= 102,
+	SFP_RX_POWER			= 104,
+	SFP_LASER_TEMP			= 106,
+	SFP_TEC_CUR			= 108,
+
+	SFP_STATUS			= 110,
 	SFP_STATUS_TX_DISABLE		= BIT(7),
 	SFP_STATUS_TX_DISABLE_FORCE	= BIT(6),
 	SFP_STATUS_TX_FAULT		= BIT(2),
 	SFP_STATUS_RX_LOS		= BIT(1),
-	SFP_ALARM0			= 0x70,
+	SFP_ALARM0			= 112,
 	SFP_ALARM0_TEMP_HIGH		= BIT(7),
 	SFP_ALARM0_TEMP_LOW		= BIT(6),
 	SFP_ALARM0_VCC_HIGH		= BIT(5),
@@ -470,11 +470,11 @@ enum {
 	SFP_ALARM0_TXPWR_HIGH		= BIT(1),
 	SFP_ALARM0_TXPWR_LOW		= BIT(0),
 
-	SFP_ALARM1			= 0x71,
+	SFP_ALARM1			= 113,
 	SFP_ALARM1_RXPWR_HIGH		= BIT(7),
 	SFP_ALARM1_RXPWR_LOW		= BIT(6),
 
-	SFP_WARN0			= 0x74,
+	SFP_WARN0			= 116,
 	SFP_WARN0_TEMP_HIGH		= BIT(7),
 	SFP_WARN0_TEMP_LOW		= BIT(6),
 	SFP_WARN0_VCC_HIGH		= BIT(5),
@@ -484,15 +484,15 @@ enum {
 	SFP_WARN0_TXPWR_HIGH		= BIT(1),
 	SFP_WARN0_TXPWR_LOW		= BIT(0),
 
-	SFP_WARN1			= 0x75,
+	SFP_WARN1			= 117,
 	SFP_WARN1_RXPWR_HIGH		= BIT(7),
 	SFP_WARN1_RXPWR_LOW		= BIT(6),
 
-	SFP_EXT_STATUS			= 0x76,
+	SFP_EXT_STATUS			= 118,
 	SFP_EXT_STATUS_PWRLVL_SELECT	= BIT(0),
 
-	SFP_VSL				= 0x78,
-	SFP_PAGE			= 0x7f,
+	SFP_VSL				= 120,
+	SFP_PAGE			= 127,
 };
 
 struct fwnode_handle;
-- 
cgit 


From d83845d224a059165902ecd0e1203015c5713a78 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 27 Oct 2022 14:21:21 +0100
Subject: net: sfp: move field definitions along side register index

Just as we do for the A2h enum, arrange the A0h enum to have the
field definitions next to their corresponding register index.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sfp.h | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 4e2db155664d..52b98f9666a2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -333,7 +333,10 @@ enum {
 /* SFP EEPROM registers */
 enum {
 	SFP_PHYS_ID			= 0,
+
 	SFP_PHYS_EXT_ID			= 1,
+	SFP_PHYS_EXT_ID_SFP		= 0x04,
+
 	SFP_CONNECTOR			= 2,
 	SFP_COMPLIANCE			= 3,
 	SFP_ENCODING			= 11,
@@ -354,17 +357,8 @@ enum {
 	SFP_OPTICAL_WAVELENGTH_LSB	= 61,
 	SFP_CABLE_SPEC			= 60,
 	SFP_CC_BASE			= 63,
-	SFP_OPTIONS			= 64,	/* 2 bytes, MSB, LSB */
-	SFP_BR_MAX			= 66,
-	SFP_BR_MIN			= 67,
-	SFP_VENDOR_SN			= 68,
-	SFP_DATECODE			= 84,
-	SFP_DIAGMON			= 92,
-	SFP_ENHOPTS			= 93,
-	SFP_SFF8472_COMPLIANCE		= 94,
-	SFP_CC_EXT			= 95,
 
-	SFP_PHYS_EXT_ID_SFP		= 0x04,
+	SFP_OPTIONS			= 64,	/* 2 bytes, MSB, LSB */
 	SFP_OPTIONS_HIGH_POWER_LEVEL	= BIT(13),
 	SFP_OPTIONS_PAGING_A2		= BIT(12),
 	SFP_OPTIONS_RETIMER		= BIT(11),
@@ -378,11 +372,20 @@ enum {
 	SFP_OPTIONS_TX_FAULT		= BIT(3),
 	SFP_OPTIONS_LOS_INVERTED	= BIT(2),
 	SFP_OPTIONS_LOS_NORMAL		= BIT(1),
+
+	SFP_BR_MAX			= 66,
+	SFP_BR_MIN			= 67,
+	SFP_VENDOR_SN			= 68,
+	SFP_DATECODE			= 84,
+
+	SFP_DIAGMON			= 92,
 	SFP_DIAGMON_DDM			= BIT(6),
 	SFP_DIAGMON_INT_CAL		= BIT(5),
 	SFP_DIAGMON_EXT_CAL		= BIT(4),
 	SFP_DIAGMON_RXPWR_AVG		= BIT(3),
 	SFP_DIAGMON_ADDRMODE		= BIT(2),
+
+	SFP_ENHOPTS			= 93,
 	SFP_ENHOPTS_ALARMWARN		= BIT(7),
 	SFP_ENHOPTS_SOFT_TX_DISABLE	= BIT(6),
 	SFP_ENHOPTS_SOFT_TX_FAULT	= BIT(5),
@@ -390,6 +393,8 @@ enum {
 	SFP_ENHOPTS_SOFT_RATE_SELECT	= BIT(3),
 	SFP_ENHOPTS_APP_SELECT_SFF8079	= BIT(2),
 	SFP_ENHOPTS_SOFT_RATE_SFF8431	= BIT(1),
+
+	SFP_SFF8472_COMPLIANCE		= 94,
 	SFP_SFF8472_COMPLIANCE_NONE	= 0x00,
 	SFP_SFF8472_COMPLIANCE_REV9_3	= 0x01,
 	SFP_SFF8472_COMPLIANCE_REV9_5	= 0x02,
@@ -399,6 +404,8 @@ enum {
 	SFP_SFF8472_COMPLIANCE_REV11_3	= 0x06,
 	SFP_SFF8472_COMPLIANCE_REV11_4	= 0x07,
 	SFP_SFF8472_COMPLIANCE_REV12_0	= 0x08,
+
+	SFP_CC_EXT			= 95,
 };
 
 /* SFP Diagnostics */
-- 
cgit 


From f3fb589aeb88c5bf7a7a804d36a8fa219b72e290 Mon Sep 17 00:00:00 2001
From: Juhee Kang <claudiajkang@gmail.com>
Date: Fri, 28 Oct 2022 01:04:24 +0900
Subject: net: remove unused netdev_unregistering()

Currently, use dev->reg_state == NETREG_UNREGISTERING to check the status
which is NETREG_UNREGISTERING, rather than using netdev_unregistering.
Also, A helper function which is netdev_unregistering on nedevice.h is no
longer used. Thus, netdev_unregistering removes from netdevice.h.

Signed-off-by: Juhee Kang <claudiajkang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eddf8ee270e7..99e58b773266 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5101,11 +5101,6 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-static inline bool netdev_unregistering(const struct net_device *dev)
-{
-	return dev->reg_state == NETREG_UNREGISTERING;
-}
-
 static inline const char *netdev_reg_state(const struct net_device *dev)
 {
 	switch (dev->reg_state) {
-- 
cgit 


From b9a61b97798ca6d0c856a217e61e2177bd8f6eae Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 28 Oct 2022 04:04:12 -0700
Subject: ptp: add missing documentation for parameters

The ptp_find_pin_unlocked function and the ptp_system_timestamp structure
didn't document their parameters and fields. Fix this.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 92b44161408e..ad4aaadc2f7a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -45,6 +45,8 @@ struct system_device_crosststamp;
 
 /**
  * struct ptp_system_timestamp - system time corresponding to a PHC timestamp
+ * @pre_ts: system timestamp before capturing PHC
+ * @post_ts: system timestamp after capturing PHC
  */
 struct ptp_system_timestamp {
 	struct timespec64 pre_ts;
@@ -316,6 +318,11 @@ int ptp_find_pin(struct ptp_clock *ptp,
  * should most likely call ptp_find_pin() directly from their
  * ptp_clock_info::enable() method.
  *
+* @ptp:    The clock obtained from ptp_clock_register().
+* @func:   One of the ptp_pin_function enumerated values.
+* @chan:   The particular functional channel to find.
+* Return:  Pin index in the range of zero to ptp_clock_caps.n_pins - 1,
+*          or -1 if the auxiliary function cannot be found.
  */
 
 int ptp_find_pin_unlocked(struct ptp_clock *ptp,
-- 
cgit 


From 1060707e380994e7c9b754b6eb74f25459b4a5b3 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 28 Oct 2022 04:04:13 -0700
Subject: ptp: introduce helpers to adjust by scaled parts per million

Many drivers implement the .adjfreq or .adjfine PTP op function with the
same basic logic:

  1. Determine a base frequency value
  2. Multiply this by the abs() of the requested adjustment, then divide by
     the appropriate divisor (1 billion, or 65,536 billion).
  3. Add or subtract this difference from the base frequency to calculate a
     new adjustment.

A few drivers need the difference and direction rather than the combined
new increment value.

I recently converted the Intel drivers to .adjfine and the scaled parts per
million (65.536 parts per billion) logic. To avoid overflow with minimal
loss of precision, mul_u64_u64_div_u64 was used.

The basic logic used by all of these drivers is very similar, and leads to
a lot of duplicate code to perform the same task.

Rather than keep this duplicate code, introduce diff_by_scaled_ppm and
adjust_by_scaled_ppm. These helper functions calculate the difference or
adjustment necessary based on the scaled parts per million input.

The diff_by_scaled_ppm function returns true if the difference should be
subtracted, and false otherwise.

Update the Intel drivers to use the new helper functions. Other vendor
drivers will be converted to .adjfine and this helper function in the
following changes.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 46 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index ad4aaadc2f7a..f4781c5766d6 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -248,6 +248,52 @@ static inline long scaled_ppm_to_ppb(long ppm)
 	return (long)ppb;
 }
 
+/**
+ * diff_by_scaled_ppm - Calculate difference using scaled ppm
+ * @base: the base increment value to adjust
+ * @scaled_ppm: scaled parts per million to adjust by
+ * @diff: on return, the absolute value of calculated diff
+ *
+ * Calculate the difference to adjust the base increment using scaled parts
+ * per million.
+ *
+ * Use mul_u64_u64_div_u64 to perform the difference calculation in avoid
+ * possible overflow.
+ *
+ * Returns: true if scaled_ppm is negative, false otherwise
+ */
+static inline bool diff_by_scaled_ppm(u64 base, long scaled_ppm, u64 *diff)
+{
+	bool negative = false;
+
+	if (scaled_ppm < 0) {
+		negative = true;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	*diff = mul_u64_u64_div_u64(base, (u64)scaled_ppm, 1000000ULL << 16);
+
+	return negative;
+}
+
+/**
+ * adjust_by_scaled_ppm - Adjust a base increment by scaled parts per million
+ * @base: the base increment value to adjust
+ * @scaled_ppm: scaled parts per million frequency adjustment
+ *
+ * Helper function which calculates a new increment value based on the
+ * requested scaled parts per million adjustment.
+ */
+static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm)
+{
+	u64 diff;
+
+	if (diff_by_scaled_ppm(base, scaled_ppm, &diff))
+		return base - diff;
+
+	return base + diff;
+}
+
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 
 /**
-- 
cgit 


From 1d997f1013079c05b642c739901e3584a3ae558d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 28 Oct 2022 04:42:21 -0400
Subject: rtnetlink: pass netlink message header and portid to
 rtnl_configure_link()

This patch pass netlink message header and portid to rtnl_configure_link()
All the functions in this call chain need to add the parameters so we can
use them in the last call rtnl_notify(), and notify the userspace about
the new link info if NLM_F_ECHO flag is set.

- rtnl_configure_link()
  - __dev_notify_flags()
    - rtmsg_ifinfo()
      - rtmsg_ifinfo_event()
        - rtmsg_ifinfo_build_skb()
        - rtmsg_ifinfo_send()
	  - rtnl_notify()

Also move __dev_notify_flags() declaration to net/core/dev.h, as Jakub
suggested.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 --
 include/linux/rtnetlink.h | 9 +++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 99e58b773266..4b5052db978f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3855,8 +3855,6 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags,
 		       struct netlink_ext_ack *extack);
 int dev_change_flags(struct net_device *dev, unsigned int flags,
 		     struct netlink_ext_ack *extack);
-void __dev_notify_flags(struct net_device *, unsigned int old_flags,
-			unsigned int gchanges);
 int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
 int __dev_change_net_namespace(struct net_device *dev, struct net *net,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ae2c6a3cec5d..92ad75549e9c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -12,21 +12,22 @@
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
 extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
-			u32 group, struct nlmsghdr *nlh, gfp_t flags);
+			u32 group, const struct nlmsghdr *nlh, gfp_t flags);
 extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags,
+		  u32 portid, const struct nlmsghdr *nlh);
 void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
 			 gfp_t flags, int *new_nsid, int new_ifindex);
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 				       unsigned change, u32 event,
 				       gfp_t flags, int *new_nsid,
-				       int new_ifindex);
+				       int new_ifindex, u32 portid, u32 seq);
 void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
-		       gfp_t flags);
+		       gfp_t flags, u32 portid, const struct nlmsghdr *nlh);
 
 
 /* RTNL is used as a global lock for all changes to network configuration  */
-- 
cgit 


From d08b0f8f46e45a274fc8c9a5bc92cb9da70d9887 Mon Sep 17 00:00:00 2001
From: Shane Parslow <shaneparslow808@gmail.com>
Date: Sat, 29 Oct 2022 02:03:56 -0700
Subject: net: wwan: iosm: add rpc interface for xmm modems

Add a new iosm wwan port that connects to the modem rpc interface. This
interface provides a configuration channel, and in the case of the 7360, is
the only way to configure the modem (as it does not support mbim).

The new interface is compatible with existing software, such as
open_xdatachannel.py from the xmm7360-pci project [1].

[1] https://github.com/xmm7360/xmm7360-pci

Signed-off-by: Shane Parslow <shaneparslow808@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/wwan.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 5ce2acf444fb..24d76500b1cc 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -15,6 +15,7 @@
  * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control
  * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
  * @WWAN_PORT_FIREHOSE: XML based command protocol
+ * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems
  *
  * @WWAN_PORT_MAX: Highest supported port types
  * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
@@ -26,6 +27,7 @@ enum wwan_port_type {
 	WWAN_PORT_QMI,
 	WWAN_PORT_QCDM,
 	WWAN_PORT_FIREHOSE,
+	WWAN_PORT_XMMRPC,
 
 	/* Add new port types above this line */
 
-- 
cgit 


From 23da464dd6b8935b66f4ee306ad8947fd32ccd75 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 4 Nov 2022 00:39:51 +0530
Subject: bpf: Allow specifying volatile type modifier for kptrs

This is useful in particular to mark the pointer as volatile, so that
compiler treats each load and store to the field as a volatile access.
The alternative is having to define and use READ_ONCE and WRITE_ONCE in
the BPF program.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9aababc5d78..86aad9b2ce02 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -288,6 +288,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
 }
 
+static inline bool btf_type_is_volatile(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE;
+}
+
 static inline bool btf_type_is_func(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
-- 
cgit 


From a35ec8e38cdd1766f29924ca391a01de20163931 Mon Sep 17 00:00:00 2001
From: "Hans J. Schultz" <netdev@kapio-technology.com>
Date: Tue, 1 Nov 2022 21:39:21 +0200
Subject: bridge: Add MAC Authentication Bypass (MAB) support

Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.

The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.

There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.

For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.

Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:

1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
   in which case the "locked" flag is cleared. FDB entries cannot roam
   to locked ports regardless of MAB being enabled or not. Therefore,
   locked FDB entries are only created if an FDB entry with the given {MAC,
   VID} does not already exist. This behavior prevents unauthenticated
   devices from disrupting traffic destined to already authenticated
   devices.

2. Aging: Locked FDB entries age and refresh by incoming traffic like
   regular entries.

3. Forwarding: Locked FDB entries forward traffic like regular entries.
   If user space detects an unauthorized MAC behind a locked port and
   wishes to prevent traffic with this MAC DA from reaching the host, it
   can do so using tc or a different mechanism.

Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.

Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index d62ef428e3aa..1668ac4d7adc 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -59,6 +59,7 @@ struct br_ip_list {
 #define BR_MRP_LOST_IN_CONT	BIT(19)
 #define BR_TX_FWD_OFFLOAD	BIT(20)
 #define BR_PORT_LOCKED		BIT(21)
+#define BR_PORT_MAB		BIT(22)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
-- 
cgit 


From 02a68a47eadedf95748facfca6ced31fb0181d52 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 2 Nov 2022 17:02:03 +0100
Subject: net: devlink: track netdev with devlink_port assigned

Currently, ethernet drivers are using devlink_port_type_eth_set() and
devlink_port_type_clear() to set devlink port type and link to related
netdev.

Instead of calling them directly, let the driver use
SET_NETDEV_DEVLINK_PORT macro to assign devlink_port pointer and let
devlink to track it. Note the devlink port pointer is static during
the time netdevice is registered.

In devlink code, use per-namespace netdev notifier to track
the netdevices with devlink_port assigned and change the internal
devlink_port type and related type pointer accordingly.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4b5052db978f..f048a30ea10b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1999,6 +1999,11 @@ enum netdev_ml_priv_type {
  *					registered
  *	@offload_xstats_l3:	L3 HW stats for this netdevice.
  *
+ *	@devlink_port:	Pointer to related devlink port structure.
+ *			Assigned by a driver before netdev registration using
+ *			SET_NETDEV_DEVLINK_PORT macro. This pointer is static
+ *			during the time netdevice is registered.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2349,9 +2354,22 @@ struct net_device {
 	netdevice_tracker	watchdog_dev_tracker;
 	netdevice_tracker	dev_registered_tracker;
 	struct rtnl_hw_stats64	*offload_xstats_l3;
+
+	struct devlink_port	*devlink_port;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
+/*
+ * Driver should use this to assign devlink port instance to a netdevice
+ * before it registers the netdevice. Therefore devlink_port is static
+ * during the netdev lifetime after it is registered.
+ */
+#define SET_NETDEV_DEVLINK_PORT(dev, port)			\
+({								\
+	WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED);	\
+	((dev)->devlink_port = (port));				\
+})
+
 static inline bool netif_elide_gro(const struct net_device *dev)
 {
 	if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
@@ -2785,6 +2803,7 @@ enum netdev_cmd {
 	NETDEV_PRE_TYPE_CHANGE,
 	NETDEV_POST_TYPE_CHANGE,
 	NETDEV_POST_INIT,
+	NETDEV_PRE_UNINIT,
 	NETDEV_RELEASE,
 	NETDEV_NOTIFY_PEERS,
 	NETDEV_JOIN,
-- 
cgit 


From 77df1db80da384c565106321f5934967690da7dd Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 2 Nov 2022 17:02:10 +0100
Subject: net: remove unused ndo_get_devlink_port

Remove ndo_get_devlink_port which is no longer used alongside with the
implementations in drivers.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f048a30ea10b..d45713a06568 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1366,10 +1366,6 @@ struct netdev_net_notifier {
  *	queue id bound to an AF_XDP socket. The flags field specifies if
  *	only RX, only Tx, or both should be woken up using the flags
  *	XDP_WAKEUP_RX and XDP_WAKEUP_TX.
- * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
- *	Get devlink port instance associated with a given netdev.
- *	Called with a reference on the netdevice and devlink locks only,
- *	rtnl_lock is not held.
  * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
  *			 int cmd);
  *	Add, change, delete or get information on an IPv4 tunnel.
@@ -1600,7 +1596,6 @@ struct net_device_ops {
 							  struct xdp_buff *xdp);
 	int			(*ndo_xsk_wakeup)(struct net_device *dev,
 						  u32 queue_id, u32 flags);
-	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
 	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
-- 
cgit 


From aa3496accc412b3d975e4ee5d06076d73394d8b5 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 4 Nov 2022 00:39:55 +0530
Subject: bpf: Refactor kptr_off_tab into btf_record

To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.

Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.

The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.

Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.

Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 125 ++++++++++++++++++++++++++++++++++------------------
 include/linux/btf.h |   3 +-
 2 files changed, 82 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8d948bfcb984..5f2a42033a37 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -165,35 +165,41 @@ struct bpf_map_ops {
 };
 
 enum {
-	/* Support at most 8 pointers in a BPF map value */
-	BPF_MAP_VALUE_OFF_MAX = 8,
-	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
+	/* Support at most 8 pointers in a BTF type */
+	BTF_FIELDS_MAX	      = 8,
+	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX +
 				1 + /* for bpf_spin_lock */
 				1,  /* for bpf_timer */
 };
 
-enum bpf_kptr_type {
-	BPF_KPTR_UNREF,
-	BPF_KPTR_REF,
+enum btf_field_type {
+	BPF_KPTR_UNREF = (1 << 2),
+	BPF_KPTR_REF   = (1 << 3),
+	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
 };
 
-struct bpf_map_value_off_desc {
+struct btf_field_kptr {
+	struct btf *btf;
+	struct module *module;
+	btf_dtor_kfunc_t dtor;
+	u32 btf_id;
+};
+
+struct btf_field {
 	u32 offset;
-	enum bpf_kptr_type type;
-	struct {
-		struct btf *btf;
-		struct module *module;
-		btf_dtor_kfunc_t dtor;
-		u32 btf_id;
-	} kptr;
+	enum btf_field_type type;
+	union {
+		struct btf_field_kptr kptr;
+	};
 };
 
-struct bpf_map_value_off {
-	u32 nr_off;
-	struct bpf_map_value_off_desc off[];
+struct btf_record {
+	u32 cnt;
+	u32 field_mask;
+	struct btf_field fields[];
 };
 
-struct bpf_map_off_arr {
+struct btf_field_offs {
 	u32 cnt;
 	u32 field_off[BPF_MAP_OFF_ARR_MAX];
 	u8 field_sz[BPF_MAP_OFF_ARR_MAX];
@@ -215,7 +221,7 @@ struct bpf_map {
 	u64 map_extra; /* any per-map-type extra fields */
 	u32 map_flags;
 	int spin_lock_off; /* >=0 valid offset, <0 error */
-	struct bpf_map_value_off *kptr_off_tab;
+	struct btf_record *record;
 	int timer_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
@@ -227,7 +233,7 @@ struct bpf_map {
 	struct obj_cgroup *objcg;
 #endif
 	char name[BPF_OBJ_NAME_LEN];
-	struct bpf_map_off_arr *off_arr;
+	struct btf_field_offs *field_offs;
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
 	 */
@@ -251,6 +257,37 @@ struct bpf_map {
 	bool frozen; /* write-once; write-protected by freeze_mutex */
 };
 
+static inline u32 btf_field_type_size(enum btf_field_type type)
+{
+	switch (type) {
+	case BPF_KPTR_UNREF:
+	case BPF_KPTR_REF:
+		return sizeof(u64);
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static inline u32 btf_field_type_align(enum btf_field_type type)
+{
+	switch (type) {
+	case BPF_KPTR_UNREF:
+	case BPF_KPTR_REF:
+		return __alignof__(u64);
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
+{
+	if (IS_ERR_OR_NULL(rec))
+		return false;
+	return rec->field_mask & type;
+}
+
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
 {
 	return map->spin_lock_off >= 0;
@@ -261,23 +298,19 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
 	return map->timer_off >= 0;
 }
 
-static inline bool map_value_has_kptrs(const struct bpf_map *map)
-{
-	return !IS_ERR_OR_NULL(map->kptr_off_tab);
-}
-
 static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 {
 	if (unlikely(map_value_has_spin_lock(map)))
 		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
 	if (unlikely(map_value_has_timer(map)))
 		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
-	if (unlikely(map_value_has_kptrs(map))) {
-		struct bpf_map_value_off *tab = map->kptr_off_tab;
+	if (!IS_ERR_OR_NULL(map->record)) {
+		struct btf_field *fields = map->record->fields;
+		u32 cnt = map->record->cnt;
 		int i;
 
-		for (i = 0; i < tab->nr_off; i++)
-			*(u64 *)(dst + tab->off[i].offset) = 0;
+		for (i = 0; i < cnt; i++)
+			memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
 	}
 }
 
@@ -303,7 +336,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b
 	u32 curr_off = 0;
 	int i;
 
-	if (likely(!map->off_arr)) {
+	if (likely(!map->field_offs)) {
 		if (long_memcpy)
 			bpf_long_memcpy(dst, src, round_up(map->value_size, 8));
 		else
@@ -311,11 +344,12 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b
 		return;
 	}
 
-	for (i = 0; i < map->off_arr->cnt; i++) {
-		u32 next_off = map->off_arr->field_off[i];
+	for (i = 0; i < map->field_offs->cnt; i++) {
+		u32 next_off = map->field_offs->field_off[i];
+		u32 sz = next_off - curr_off;
 
-		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
-		curr_off += map->off_arr->field_sz[i];
+		memcpy(dst + curr_off, src + curr_off, sz);
+		curr_off += map->field_offs->field_sz[i];
 	}
 	memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
 }
@@ -335,16 +369,17 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
 	u32 curr_off = 0;
 	int i;
 
-	if (likely(!map->off_arr)) {
+	if (likely(!map->field_offs)) {
 		memset(dst, 0, map->value_size);
 		return;
 	}
 
-	for (i = 0; i < map->off_arr->cnt; i++) {
-		u32 next_off = map->off_arr->field_off[i];
+	for (i = 0; i < map->field_offs->cnt; i++) {
+		u32 next_off = map->field_offs->field_off[i];
+		u32 sz = next_off - curr_off;
 
-		memset(dst + curr_off, 0, next_off - curr_off);
-		curr_off += map->off_arr->field_sz[i];
+		memset(dst + curr_off, 0, sz);
+		curr_off += map->field_offs->field_sz[i];
 	}
 	memset(dst + curr_off, 0, map->value_size - curr_off);
 }
@@ -1699,11 +1734,13 @@ void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
 
-struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
-void bpf_map_free_kptr_off_tab(struct bpf_map *map);
-struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
-bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
-void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
+struct btf_field *btf_record_find(const struct btf_record *rec,
+				  u32 offset, enum btf_field_type type);
+void btf_record_free(struct btf_record *rec);
+void bpf_map_free_record(struct bpf_map *map);
+struct btf_record *btf_record_dup(const struct btf_record *rec);
+bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
+void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
 
 struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 86aad9b2ce02..9e62717cdc7a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -163,8 +163,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
-struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
-					  const struct btf_type *t);
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
-- 
cgit 


From db559117828d2448fe81ada051c60bcf39f822e9 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 4 Nov 2022 00:39:56 +0530
Subject: bpf: Consolidate spin_lock, timer management into btf_record

Now that kptr_off_tab has been refactored into btf_record, and can hold
more than one specific field type, accomodate bpf_spin_lock and
bpf_timer as well.

While they don't require any more metadata than offset, having all
special fields in one place allows us to share the same code for
allocated user defined types and handle both map values and these
allocated objects in a similar fashion.

As an optimization, we still keep spin_lock_off and timer_off offsets in
the btf_record structure, just to avoid having to find the btf_field
struct each time their offset is needed. This is mostly needed to
manipulate such objects in a map value at runtime. It's ok to hardcode
just one offset as more than one field is disallowed.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-8-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 53 ++++++++++++++++++++++++++++++++---------------------
 include/linux/btf.h |  3 ++-
 2 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5f2a42033a37..aae85019abde 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -166,13 +166,13 @@ struct bpf_map_ops {
 
 enum {
 	/* Support at most 8 pointers in a BTF type */
-	BTF_FIELDS_MAX	      = 8,
-	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX +
-				1 + /* for bpf_spin_lock */
-				1,  /* for bpf_timer */
+	BTF_FIELDS_MAX	      = 10,
+	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX,
 };
 
 enum btf_field_type {
+	BPF_SPIN_LOCK  = (1 << 0),
+	BPF_TIMER      = (1 << 1),
 	BPF_KPTR_UNREF = (1 << 2),
 	BPF_KPTR_REF   = (1 << 3),
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
@@ -196,6 +196,8 @@ struct btf_field {
 struct btf_record {
 	u32 cnt;
 	u32 field_mask;
+	int spin_lock_off;
+	int timer_off;
 	struct btf_field fields[];
 };
 
@@ -220,10 +222,8 @@ struct bpf_map {
 	u32 max_entries;
 	u64 map_extra; /* any per-map-type extra fields */
 	u32 map_flags;
-	int spin_lock_off; /* >=0 valid offset, <0 error */
-	struct btf_record *record;
-	int timer_off; /* >=0 valid offset, <0 error */
 	u32 id;
+	struct btf_record *record;
 	int numa_node;
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
@@ -257,9 +257,29 @@ struct bpf_map {
 	bool frozen; /* write-once; write-protected by freeze_mutex */
 };
 
+static inline const char *btf_field_type_name(enum btf_field_type type)
+{
+	switch (type) {
+	case BPF_SPIN_LOCK:
+		return "bpf_spin_lock";
+	case BPF_TIMER:
+		return "bpf_timer";
+	case BPF_KPTR_UNREF:
+	case BPF_KPTR_REF:
+		return "kptr";
+	default:
+		WARN_ON_ONCE(1);
+		return "unknown";
+	}
+}
+
 static inline u32 btf_field_type_size(enum btf_field_type type)
 {
 	switch (type) {
+	case BPF_SPIN_LOCK:
+		return sizeof(struct bpf_spin_lock);
+	case BPF_TIMER:
+		return sizeof(struct bpf_timer);
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return sizeof(u64);
@@ -272,6 +292,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 static inline u32 btf_field_type_align(enum btf_field_type type)
 {
 	switch (type) {
+	case BPF_SPIN_LOCK:
+		return __alignof__(struct bpf_spin_lock);
+	case BPF_TIMER:
+		return __alignof__(struct bpf_timer);
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return __alignof__(u64);
@@ -288,22 +312,8 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
 	return rec->field_mask & type;
 }
 
-static inline bool map_value_has_spin_lock(const struct bpf_map *map)
-{
-	return map->spin_lock_off >= 0;
-}
-
-static inline bool map_value_has_timer(const struct bpf_map *map)
-{
-	return map->timer_off >= 0;
-}
-
 static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 {
-	if (unlikely(map_value_has_spin_lock(map)))
-		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
-	if (unlikely(map_value_has_timer(map)))
-		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
 	if (!IS_ERR_OR_NULL(map->record)) {
 		struct btf_field *fields = map->record->fields;
 		u32 cnt = map->record->cnt;
@@ -1740,6 +1750,7 @@ void btf_record_free(struct btf_record *rec);
 void bpf_map_free_record(struct bpf_map *map);
 struct btf_record *btf_record_dup(const struct btf_record *rec);
 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
+void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
 void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
 
 struct bpf_map *bpf_map_get(u32 ufd);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9e62717cdc7a..282006abd062 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -163,7 +163,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
-struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t);
+struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
+				    u32 field_mask, u32 value_size);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
-- 
cgit 


From f71b2f64177a199d5b1d2047e155d45fd98f564a Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 4 Nov 2022 00:39:57 +0530
Subject: bpf: Refactor map->off_arr handling

Refactor map->off_arr handling into generic functions that can work on
their own without hardcoding map specific code. The btf_fields_offs
structure is now returned from btf_parse_field_offs, which can be reused
later for types in program BTF.

All functions like copy_map_value, zero_map_value call generic
underlying functions so that they can also be reused later for copying
to values allocated in programs which encode specific fields.

Later, some helper functions will also require access to this
btf_field_offs structure to be able to skip over special fields at
runtime.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-9-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 41 ++++++++++++++++++++++++-----------------
 include/linux/btf.h |  1 +
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aae85019abde..798aec816970 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -341,57 +341,64 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 }
 
 /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
-static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy)
+static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
+				  void *dst, void *src, u32 size,
+				  bool long_memcpy)
 {
 	u32 curr_off = 0;
 	int i;
 
-	if (likely(!map->field_offs)) {
+	if (likely(!foffs)) {
 		if (long_memcpy)
-			bpf_long_memcpy(dst, src, round_up(map->value_size, 8));
+			bpf_long_memcpy(dst, src, round_up(size, 8));
 		else
-			memcpy(dst, src, map->value_size);
+			memcpy(dst, src, size);
 		return;
 	}
 
-	for (i = 0; i < map->field_offs->cnt; i++) {
-		u32 next_off = map->field_offs->field_off[i];
+	for (i = 0; i < foffs->cnt; i++) {
+		u32 next_off = foffs->field_off[i];
 		u32 sz = next_off - curr_off;
 
 		memcpy(dst + curr_off, src + curr_off, sz);
-		curr_off += map->field_offs->field_sz[i];
+		curr_off += foffs->field_sz[i];
 	}
-	memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
+	memcpy(dst + curr_off, src + curr_off, size - curr_off);
 }
 
 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 {
-	__copy_map_value(map, dst, src, false);
+	bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false);
 }
 
 static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src)
 {
-	__copy_map_value(map, dst, src, true);
+	bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true);
 }
 
-static inline void zero_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size)
 {
 	u32 curr_off = 0;
 	int i;
 
-	if (likely(!map->field_offs)) {
-		memset(dst, 0, map->value_size);
+	if (likely(!foffs)) {
+		memset(dst, 0, size);
 		return;
 	}
 
-	for (i = 0; i < map->field_offs->cnt; i++) {
-		u32 next_off = map->field_offs->field_off[i];
+	for (i = 0; i < foffs->cnt; i++) {
+		u32 next_off = foffs->field_off[i];
 		u32 sz = next_off - curr_off;
 
 		memset(dst + curr_off, 0, sz);
-		curr_off += map->field_offs->field_sz[i];
+		curr_off += foffs->field_sz[i];
 	}
-	memset(dst + curr_off, 0, map->value_size - curr_off);
+	memset(dst + curr_off, 0, size - curr_off);
+}
+
+static inline void zero_map_value(struct bpf_map *map, void *dst)
+{
+	bpf_obj_memzero(map->field_offs, dst, map->value_size);
 }
 
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 282006abd062..d80345fa566b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -165,6 +165,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
 struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
 				    u32 field_mask, u32 value_size);
+struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
-- 
cgit 


From 2b6c0e152868c9c5939a5c5094d5b2be61cf48e6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 28 Oct 2022 11:23:32 +0200
Subject: bcma: Use the proper gpio include

The <linux/bcma/bcma_driver_chipcommon.h> is including the legacy
header <linux/gpio.h> to obtain struct gpio_chip. Instead, include
<linux/gpio/driver.h> where this struct is defined.

It turns out that the brcm80211 brcmsmac depends on this to
bring in the symbol gpio_is_valid().

The driver looks up the BCMA parent GPIO driver and checks that
this succeeds, but then it goes on to use the deprecated GPIO
call gpio_is_valid() to check the consistency of the .base
member of the BCMA GPIO struct. The whole check can be dropped
because the bcma_gpio is initialized in the declarations:

  struct gpio_chip *bcma_gpio = &cc_drv->gpio;

And this can never be NULL.

Cc: Jonas Gorski <jonas.gorski@gmail.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20221028092332.238728-1-linus.walleij@linaro.org
---
 include/linux/bcma/bcma_driver_chipcommon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 2d94c30ed439..0cb6638b55e5 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -4,7 +4,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/platform_data/brcmnand.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 
 /** ChipCommon core registers. **/
 #define BCMA_CC_ID			0x0000
-- 
cgit 


From d08cb25556773c65efb21761b75f92c804ad0975 Mon Sep 17 00:00:00 2001
From: David Yang <mmyangfl@gmail.com>
Date: Fri, 28 Oct 2022 10:01:01 +0800
Subject: net: mv643xx_eth: support MII/GMII/RGMII modes for Kirkwood

Support mode switch properly, which is not available before.

If SoC has two Ethernet controllers, by setting both of them into MII
mode, the first controller enters GMII mode, while the second
controller is effectively disabled. This requires configuring (and
maybe enabling) the second controller in the device tree, even though
it cannot be used.

Signed-off-by: David Yang <mmyangfl@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mv643xx_eth.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 3682ae75c7aa..145169be2ed8 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -8,6 +8,7 @@
 
 #include <linux/mbus.h>
 #include <linux/if_ether.h>
+#include <linux/phy.h>
 
 #define MV643XX_ETH_SHARED_NAME		"mv643xx_eth"
 #define MV643XX_ETH_NAME		"mv643xx_eth_port"
@@ -59,6 +60,7 @@ struct mv643xx_eth_platform_data {
 	 */
 	int			speed;
 	int			duplex;
+	phy_interface_t		interface;
 
 	/*
 	 * How many RX/TX queues to use.
-- 
cgit 


From e1f4ecab19338ec7079830e8700e4869f991fd45 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 4 Nov 2022 17:13:01 +0000
Subject: net: remove explicit phylink_generic_validate() references

Virtually all conventional network drivers are now converted to use
phylink_generic_validate() - only DSA drivers and fman_memac remain,
so lets remove the necessity for network drivers to explicitly set
this member, and default to phylink_generic_validate() when unset.
This is possible as .validate must currently be set.

Any remaining instances that have not been addressed by this patch can
be fixed up later.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/E1or0FZ-001tRa-DI@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 1df3e5e316e8..c492c26202b5 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -207,6 +207,11 @@ struct phylink_mac_ops {
  *
  * If the @state->interface mode is not supported, then the @supported
  * mask must be cleared.
+ *
+ * This member is optional; if not set, the generic validator will be
+ * used making use of @config->mac_capabilities and
+ * @config->supported_interfaces to determine which link modes are
+ * supported.
  */
 void validate(struct phylink_config *config, unsigned long *supported,
 	      struct phylink_link_state *state);
-- 
cgit 


From 9a0f830f80265bd1ef816e1541ac24bee80e9a3c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 4 Nov 2022 12:01:25 -0700
Subject: ethtool: linkstate: add a statistic for PHY down events

The previous attempt to augment carrier_down (see Link)
was not met with much enthusiasm so let's do the simple
thing of exposing what some devices already maintain.
Add a common ethtool statistic for link going down.
Currently users have to maintain per-driver mapping
to extract the right stat from the vendor-specific ethtool -S
stats. carrier_down does not fit the bill because it counts
a lot of software related false positives.

Add the statistic to the extended link state API to steer
vendors towards implementing all of it.

Implement for bnxt and all Linux-controlled PHYs. mlx5 and (possibly)
enic also have a counter for this but I leave the implementation
to their maintainers.

Link: https://lore.kernel.org/r/20220520004500.2250674-1-kuba@kernel.org
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20221104190125.684910-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/ethtool.h | 17 +++++++++++++++++
 include/linux/phy.h     |  3 +++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 99dc7bfbcd3c..5c51c7fda32a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -125,6 +125,20 @@ struct ethtool_link_ext_state_info {
 	};
 };
 
+struct ethtool_link_ext_stats {
+	/* Custom Linux statistic for PHY level link down events.
+	 * In a simpler world it should be equal to netdev->carrier_down_count
+	 * unfortunately netdev also counts local reconfigurations which don't
+	 * actually take the physical link down, not to mention NC-SI which,
+	 * if present, keeps the link up regardless of host state.
+	 * This statistic counts when PHY _actually_ went down, or lost link.
+	 *
+	 * Note that we need u64 for ethtool_stats_init() and comparisons
+	 * to ETHTOOL_STAT_NOT_SET, but only u32 is exposed to the user.
+	 */
+	u64 link_down_events;
+};
+
 /**
  * ethtool_rxfh_indir_default - get default value for RX flow hash indirection
  * @index: Index in RX flow hash indirection table
@@ -481,6 +495,7 @@ struct ethtool_module_power_mode_params {
  *	do not attach ext_substate attribute to netlink message). If link_ext_state
  *	and link_ext_substate are unknown, return -ENODATA. If not implemented,
  *	link_ext_state and link_ext_substate will not be sent to userspace.
+ * @get_link_ext_stats: Read extra link-related counters.
  * @get_eeprom_len: Read range of EEPROM addresses for validation of
  *	@get_eeprom and @set_eeprom requests.
  *	Returns 0 if device does not support EEPROM access.
@@ -652,6 +667,8 @@ struct ethtool_ops {
 	u32	(*get_link)(struct net_device *);
 	int	(*get_link_ext_state)(struct net_device *,
 				      struct ethtool_link_ext_state_info *);
+	void	(*get_link_ext_stats)(struct net_device *dev,
+				      struct ethtool_link_ext_stats *stats);
 	int	(*get_eeprom_len)(struct net_device *);
 	int	(*get_eeprom)(struct net_device *,
 			      struct ethtool_eeprom *, u8 *);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ddf66198f751..9a3752c0c444 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -600,6 +600,7 @@ struct macsec_ops;
  * @psec: Pointer to Power Sourcing Equipment control struct
  * @lock:  Mutex for serialization access to PHY
  * @state_queue: Work queue for state machine
+ * @link_down_events: Number of times link was lost
  * @shared: Pointer to private data shared by phys in one package
  * @priv: Pointer to driver private data
  *
@@ -723,6 +724,8 @@ struct phy_device {
 
 	int pma_extable;
 
+	unsigned int link_down_events;
+
 	void (*phy_link_change)(struct phy_device *phydev, bool up);
 	void (*adjust_link)(struct net_device *dev);
 
-- 
cgit 


From c3d96f690a790074b508fe183a41e36a00cd7ddd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 3 Oct 2022 07:34:21 +0100
Subject: net, proc: Provide PROC_FS=n fallback for
 proc_create_net_single_write()

Provide a CONFIG_PROC_FS=n fallback for proc_create_net_single_write().

Also provide a fallback for proc_create_net_data_write().

Fixes: 564def71765c ("proc: Add a way to make network proc files writable")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: netdev@vger.kernel.org
---
 include/linux/proc_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 81d6e4ec2294..0260f5ea98fe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,8 +208,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
 static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
 
 #define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
+#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;})
 #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
 #define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
+#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;})
 
 static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
 {
-- 
cgit 


From 42fb06b391ace2aec5cdb1ebb8ff668f0a34332f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 12 Oct 2022 08:49:29 +0100
Subject: net: Change the udp encap_err_rcv to allow use of
 {ip,ipv6}_icmp_error()

Change the udp encap_err_rcv signature to match ip_icmp_error() and
ipv6_icmp_error() so that those can be used from the called function and
export them.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: netdev@vger.kernel.org
---
 include/linux/udp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 5cdba00a904a..dea57aa37df6 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -70,7 +70,8 @@ struct udp_sock {
 	 * For encapsulation sockets.
 	 */
 	int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
-	void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+	void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err,
+			      __be16 port, u32 info, u8 *payload);
 	int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
 	void (*encap_destroy)(struct sock *sk);
 
-- 
cgit 


From bd039b5ea2a91ea707ee8539df26456bd5be80af Mon Sep 17 00:00:00 2001
From: Andy Ren <andy.ren@getcruise.com>
Date: Mon, 7 Nov 2022 09:42:42 -0800
Subject: net/core: Allow live renaming when an interface is up

Allow a network interface to be renamed when the interface
is up.

As described in the netconsole documentation [1], when netconsole is
used as a built-in, it will bring up the specified interface as soon as
possible. As a result, user space will not be able to rename the
interface since the kernel disallows renaming of interfaces that are
administratively up unless the 'IFF_LIVE_RENAME_OK' private flag was set
by the kernel.

The original solution [2] to this problem was to add a new parameter to
the netconsole configuration parameters that allows renaming of
the interface used by netconsole while it is administratively up.
However, during the discussion that followed, it became apparent that we
have no reason to keep the current restriction and instead we should
allow user space to rename interfaces regardless of their administrative
state:

1. The restriction was put in place over 20 years ago when renaming was
only possible via IOCTL and before rtnetlink started notifying user
space about such changes like it does today.

2. The 'IFF_LIVE_RENAME_OK' flag was added over 3 years ago in version
5.2 and no regressions were reported.

3. In-kernel listeners to 'NETDEV_CHANGENAME' do not seem to care about
the administrative state of interface.

Therefore, allow user space to rename running interfaces by removing the
restriction and the associated 'IFF_LIVE_RENAME_OK' flag. Help in
possible triage by emitting a message to the kernel log that an
interface was renamed while UP.

[1] https://www.kernel.org/doc/Documentation/networking/netconsole.rst
[2] https://lore.kernel.org/netdev/20221102002420.2613004-1-andy.ren@getcruise.com/

Signed-off-by: Andy Ren <andy.ren@getcruise.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d45713a06568..4be87b89e481 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1650,7 +1650,6 @@ struct net_device_ops {
  * @IFF_FAILOVER: device is a failover master device
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
- * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
  * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
  *	skb_headlen(skb) == 0 (data starts from frag0)
  * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
@@ -1686,7 +1685,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER			= 1<<27,
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
-	IFF_LIVE_RENAME_OK		= 1<<30,
+	/* was IFF_LIVE_RENAME_OK */
 	IFF_TX_SKB_NO_LINEAR		= BIT_ULL(31),
 	IFF_CHANGE_PROTO_DOWN		= BIT_ULL(32),
 };
@@ -1721,7 +1720,6 @@ enum netdev_priv_flags {
 #define IFF_FAILOVER			IFF_FAILOVER
 #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
-#define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
 #define IFF_TX_SKB_NO_LINEAR		IFF_TX_SKB_NO_LINEAR
 
 /* Specifies the type of the struct net_device::ml_priv pointer */
-- 
cgit 


From 3e52fba03a20234abc65a656cef063a1045d9723 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Tue, 8 Nov 2022 14:22:06 +0100
Subject: net: introduce a helper to move notifier block to different namespace

Currently, net_dev() netdev notifier variant follows the netdev with
per-net notifier from namespace to namespace. This is implemented
by move_netdevice_notifiers_dev_net() helper.

For devlink it is needed to re-register per-net notifier during
devlink reload. Introduce a new helper called
move_netdevice_notifier_net() and share the unregister/register code
with existing move_netdevice_notifiers_dev_net() helper.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4be87b89e481..02a2318da7c7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2826,6 +2826,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb);
 int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
 int unregister_netdevice_notifier_net(struct net *net,
 				      struct notifier_block *nb);
+void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
+				 struct notifier_block *nb);
 int register_netdevice_notifier_dev_net(struct net_device *dev,
 					struct notifier_block *nb,
 					struct netdev_net_notifier *nn);
-- 
cgit 


From f6479ea4e5990e17b9690d66474198dcdba1b2c1 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 8 Nov 2022 14:25:56 +0000
Subject: net: mdio: add mdiodev_c45_(read|write)

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 00177567cfef..f7fbbf3069e7 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -488,6 +488,19 @@ static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad,
 	return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val);
 }
 
+static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad,
+				   u16 regnum)
+{
+	return mdiobus_c45_read(mdiodev->bus, mdiodev->addr, devad, regnum);
+}
+
+static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad,
+				    u16 regnum, u16 val)
+{
+	return mdiobus_c45_write(mdiodev->bus, mdiodev->addr, devad, regnum,
+				 val);
+}
+
 int mdiobus_register_device(struct mdio_device *mdiodev);
 int mdiobus_unregister_device(struct mdio_device *mdiodev);
 bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
-- 
cgit 


From cc514101a97e3fb48f8617c8f291db798d10d831 Mon Sep 17 00:00:00 2001
From: Sujuan Chen <sujuan.chen@mediatek.com>
Date: Sat, 5 Nov 2022 23:36:18 +0100
Subject: net: ethernet: mtk_wed: introduce wed mcu support

Introduce WED mcu support used to configure WED WO chip.
This is a preliminary patch in order to add RX Wireless
Ethernet Dispatch available on MT7986 SoC.

Tested-by: Daniel Golle <daniel@makrotopia.org>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/soc/mediatek/mtk_wed.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 4450c8b7a1cb..2cc2f1e43ba9 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -11,6 +11,35 @@
 struct mtk_wed_hw;
 struct mtk_wdma_desc;
 
+enum mtk_wed_wo_cmd {
+	MTK_WED_WO_CMD_WED_CFG,
+	MTK_WED_WO_CMD_WED_RX_STAT,
+	MTK_WED_WO_CMD_RRO_SER,
+	MTK_WED_WO_CMD_DBG_INFO,
+	MTK_WED_WO_CMD_DEV_INFO,
+	MTK_WED_WO_CMD_BSS_INFO,
+	MTK_WED_WO_CMD_STA_REC,
+	MTK_WED_WO_CMD_DEV_INFO_DUMP,
+	MTK_WED_WO_CMD_BSS_INFO_DUMP,
+	MTK_WED_WO_CMD_STA_REC_DUMP,
+	MTK_WED_WO_CMD_BA_INFO_DUMP,
+	MTK_WED_WO_CMD_FBCMD_Q_DUMP,
+	MTK_WED_WO_CMD_FW_LOG_CTRL,
+	MTK_WED_WO_CMD_LOG_FLUSH,
+	MTK_WED_WO_CMD_CHANGE_STATE,
+	MTK_WED_WO_CMD_CPU_STATS_ENABLE,
+	MTK_WED_WO_CMD_CPU_STATS_DUMP,
+	MTK_WED_WO_CMD_EXCEPTION_INIT,
+	MTK_WED_WO_CMD_PROF_CTRL,
+	MTK_WED_WO_CMD_STA_BA_DUMP,
+	MTK_WED_WO_CMD_BA_CTRL_DUMP,
+	MTK_WED_WO_CMD_RXCNT_CTRL,
+	MTK_WED_WO_CMD_RXCNT_INFO,
+	MTK_WED_WO_CMD_SET_CAP,
+	MTK_WED_WO_CMD_CCIF_RING_DUMP,
+	MTK_WED_WO_CMD_WED_END
+};
+
 enum mtk_wed_bus_tye {
 	MTK_WED_BUS_PCIE,
 	MTK_WED_BUS_AXI,
-- 
cgit 


From 084d60ce0c6cef96024d53a58b92d7ff2d8b9318 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Nov 2022 23:36:20 +0100
Subject: net: ethernet: mtk_wed: rename tx_wdma array in rx_wdma

Rename tx_wdma queue array in rx_wdma since this is rx side of wdma soc.
Moreover rename mtk_wed_wdma_ring_setup routine in
mtk_wed_wdma_rx_ring_setup()

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/soc/mediatek/mtk_wed.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 2cc2f1e43ba9..956978320f8b 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -7,6 +7,7 @@
 #include <linux/pci.h>
 
 #define MTK_WED_TX_QUEUES		2
+#define MTK_WED_RX_QUEUES		2
 
 struct mtk_wed_hw;
 struct mtk_wdma_desc;
@@ -66,7 +67,7 @@ struct mtk_wed_device {
 
 	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
 	struct mtk_wed_ring txfree_ring;
-	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
+	struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
 
 	struct {
 		int size;
-- 
cgit 


From 4c5de09eb0d05fc9e73ff8f0e052622f1f3df6d8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Nov 2022 23:36:21 +0100
Subject: net: ethernet: mtk_wed: add configure wed wo support

Enable RX Wireless Ethernet Dispatch available on MT7986 Soc.

Tested-by: Daniel Golle <daniel@makrotopia.org>
Co-developed-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/soc/mediatek/mtk_wed.h | 76 +++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 956978320f8b..8294978f4bca 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -5,10 +5,13 @@
 #include <linux/rcupdate.h>
 #include <linux/regmap.h>
 #include <linux/pci.h>
+#include <linux/skbuff.h>
 
 #define MTK_WED_TX_QUEUES		2
 #define MTK_WED_RX_QUEUES		2
 
+#define WED_WO_STA_REC			0x6
+
 struct mtk_wed_hw;
 struct mtk_wdma_desc;
 
@@ -41,21 +44,37 @@ enum mtk_wed_wo_cmd {
 	MTK_WED_WO_CMD_WED_END
 };
 
+struct mtk_rxbm_desc {
+	__le32 buf0;
+	__le32 token;
+} __packed __aligned(4);
+
 enum mtk_wed_bus_tye {
 	MTK_WED_BUS_PCIE,
 	MTK_WED_BUS_AXI,
 };
 
+#define MTK_WED_RING_CONFIGURED		BIT(0)
 struct mtk_wed_ring {
 	struct mtk_wdma_desc *desc;
 	dma_addr_t desc_phys;
 	u32 desc_size;
 	int size;
+	u32 flags;
 
 	u32 reg_base;
 	void __iomem *wpdma;
 };
 
+struct mtk_wed_wo_rx_stats {
+	__le16 wlan_idx;
+	__le16 tid;
+	__le32 rx_pkt_cnt;
+	__le32 rx_byte_cnt;
+	__le32 rx_err_cnt;
+	__le32 rx_drop_cnt;
+};
+
 struct mtk_wed_device {
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
 	const struct mtk_wed_ops *ops;
@@ -64,9 +83,12 @@ struct mtk_wed_device {
 	bool init_done, running;
 	int wdma_idx;
 	int irq;
+	u8 version;
 
 	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
+	struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES];
 	struct mtk_wed_ring txfree_ring;
+	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
 	struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
 
 	struct {
@@ -74,7 +96,20 @@ struct mtk_wed_device {
 		void **pages;
 		struct mtk_wdma_desc *desc;
 		dma_addr_t desc_phys;
-	} buf_ring;
+	} tx_buf_ring;
+
+	struct {
+		int size;
+		struct page_frag_cache rx_page;
+		struct mtk_rxbm_desc *desc;
+		dma_addr_t desc_phys;
+	} rx_buf_ring;
+
+	struct {
+		struct mtk_wed_ring ring;
+		dma_addr_t miod_phys;
+		dma_addr_t fdbk_phys;
+	} rro;
 
 	/* filled by driver: */
 	struct {
@@ -83,22 +118,36 @@ struct mtk_wed_device {
 			struct pci_dev *pci_dev;
 		};
 		enum mtk_wed_bus_tye bus_type;
+		void __iomem *base;
+		u32 phy_base;
 
 		u32 wpdma_phys;
 		u32 wpdma_int;
 		u32 wpdma_mask;
 		u32 wpdma_tx;
 		u32 wpdma_txfree;
+		u32 wpdma_rx_glo;
+		u32 wpdma_rx;
+
+		bool wcid_512;
 
 		u16 token_start;
 		unsigned int nbuf;
+		unsigned int rx_nbuf;
+		unsigned int rx_npkt;
+		unsigned int rx_size;
 
 		u8 tx_tbit[MTK_WED_TX_QUEUES];
+		u8 rx_tbit[MTK_WED_RX_QUEUES];
 		u8 txfree_tbit;
 
 		u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
 		int (*offload_enable)(struct mtk_wed_device *wed);
 		void (*offload_disable)(struct mtk_wed_device *wed);
+		u32 (*init_rx_buf)(struct mtk_wed_device *wed, int size);
+		void (*release_rx_buf)(struct mtk_wed_device *wed);
+		void (*update_wo_rx_stats)(struct mtk_wed_device *wed,
+					   struct mtk_wed_wo_rx_stats *stats);
 	} wlan;
 #endif
 };
@@ -107,9 +156,15 @@ struct mtk_wed_ops {
 	int (*attach)(struct mtk_wed_device *dev);
 	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
 			     void __iomem *regs);
+	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+			     void __iomem *regs);
 	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
 				 void __iomem *regs);
+	int (*msg_update)(struct mtk_wed_device *dev, int cmd_id,
+			  void *data, int len);
 	void (*detach)(struct mtk_wed_device *dev);
+	void (*ppe_check)(struct mtk_wed_device *dev, struct sk_buff *skb,
+			  u32 reason, u32 hash);
 
 	void (*stop)(struct mtk_wed_device *dev);
 	void (*start)(struct mtk_wed_device *dev, u32 irq_mask);
@@ -144,6 +199,16 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
 	return ret;
 }
 
+static inline bool
+mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
+{
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+	return dev->version != 1;
+#else
+	return false;
+#endif
+}
+
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
 #define mtk_wed_device_active(_dev) !!(_dev)->ops
 #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
@@ -160,6 +225,12 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
 	(_dev)->ops->irq_get(_dev, _mask)
 #define mtk_wed_device_irq_set_mask(_dev, _mask) \
 	(_dev)->ops->irq_set_mask(_dev, _mask)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \
+	(_dev)->ops->rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \
+	(_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
+#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
+	(_dev)->ops->msg_update(_dev, _id, _msg, _len)
 #else
 static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 {
@@ -173,6 +244,9 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
 #define mtk_wed_device_irq_get(_dev, _mask) 0
 #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash)  do {} while (0)
+#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
 #endif
 
 #endif
-- 
cgit 


From 75ab70ec5cefade915a999886e4863fe6a38f8b5 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Nov 2022 15:09:45 -0800
Subject: ptp: remove the .adjfreq interface function

Now that all drivers have been converted to .adjfine, we can remove the
.adjfreq from the interface structure.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index f4781c5766d6..fdffa6a98d79 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -77,12 +77,6 @@ struct ptp_system_timestamp {
  *            nominal frequency in parts per million, but with a
  *            16 bit binary fractional field.
  *
- * @adjfreq:  Adjusts the frequency of the hardware clock.
- *            This method is deprecated.  New drivers should implement
- *            the @adjfine method instead.
- *            parameter delta: Desired frequency offset from nominal frequency
- *            in parts per billion
- *
  * @adjphase:  Adjusts the phase offset of the hardware clock.
  *             parameter delta: Desired change in nanoseconds.
  *
@@ -174,7 +168,6 @@ struct ptp_clock_info {
 	int pps;
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
-	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
-- 
cgit 


From 354259fa73e2aac92ae5e19522adb69a92c15b49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Nov 2022 09:57:58 +0000
Subject: net: remove skb->vlan_present

skb->vlan_present seems redundant.

We can instead derive it from this boolean expression:

vlan_present = skb->vlan_proto != 0 || skb->vlan_tci != 0

Add a new union, to access both fields in a single load/store
when possible.

	union {
		u32	vlan_all;
		struct {
		__be16	vlan_proto;
		__u16	vlan_tci;
		};
	};

This allows following patch to remove a conditional test in GRO stack.

Note:
  We move remcsum_offload to keep TC_AT_INGRESS_MASK
  and SKB_MONO_DELIVERY_TIME_MASK unchanged.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_vlan.h |  9 +++------
 include/linux/skbuff.h  | 18 ++++++++++--------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e00c4ee81ff7..6864b89ef868 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -76,7 +76,7 @@ static inline bool is_vlan_dev(const struct net_device *dev)
         return dev->priv_flags & IFF_802_1Q_VLAN;
 }
 
-#define skb_vlan_tag_present(__skb)	((__skb)->vlan_present)
+#define skb_vlan_tag_present(__skb)	(!!(__skb)->vlan_all)
 #define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci)
 #define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
 #define skb_vlan_tag_get_cfi(__skb)	(!!((__skb)->vlan_tci & VLAN_CFI_MASK))
@@ -471,7 +471,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
  */
 static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
 {
-	skb->vlan_present = 0;
+	skb->vlan_all = 0;
 }
 
 /**
@@ -483,9 +483,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
  */
 static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src)
 {
-	dst->vlan_present = src->vlan_present;
-	dst->vlan_proto = src->vlan_proto;
-	dst->vlan_tci = src->vlan_tci;
+	dst->vlan_all = src->vlan_all;
 }
 
 /*
@@ -519,7 +517,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
 {
 	skb->vlan_proto = vlan_proto;
 	skb->vlan_tci = vlan_tci;
-	skb->vlan_present = 1;
 }
 
 /**
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 59c9fd55699d..4e464a27adaf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -818,7 +818,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@mark: Generic packet mark
  *	@reserved_tailroom: (aka @mark) number of bytes of free space available
  *		at the tail of an sk_buff
- *	@vlan_present: VLAN tag is present
+ *	@vlan_all: vlan fields (proto & tci)
  *	@vlan_proto: vlan encapsulation protocol
  *	@vlan_tci: vlan tag control information
  *	@inner_protocol: Protocol (encapsulation)
@@ -951,7 +951,7 @@ struct sk_buff {
 	/* private: */
 	__u8			__pkt_vlan_present_offset[0];
 	/* public: */
-	__u8			vlan_present:1;	/* See PKT_VLAN_PRESENT_BIT */
+	__u8			remcsum_offload:1;
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			dst_pending_confirm:1;
@@ -966,7 +966,6 @@ struct sk_buff {
 
 	__u8			ipvs_property:1;
 	__u8			inner_protocol_type:1;
-	__u8			remcsum_offload:1;
 #ifdef CONFIG_NET_SWITCHDEV
 	__u8			offload_fwd_mark:1;
 	__u8			offload_l3_fwd_mark:1;
@@ -999,8 +998,13 @@ struct sk_buff {
 	__u32			priority;
 	int			skb_iif;
 	__u32			hash;
-	__be16			vlan_proto;
-	__u16			vlan_tci;
+	union {
+		u32		vlan_all;
+		struct {
+			__be16	vlan_proto;
+			__u16	vlan_tci;
+		};
+	};
 #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
 	union {
 		unsigned int	napi_id;
@@ -1059,15 +1063,13 @@ struct sk_buff {
 #endif
 #define PKT_TYPE_OFFSET		offsetof(struct sk_buff, __pkt_type_offset)
 
-/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time
+/* if you move tc_at_ingress or mono_delivery_time
  * around, you also must adapt these constants.
  */
 #ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_VLAN_PRESENT_BIT	7
 #define TC_AT_INGRESS_MASK		(1 << 0)
 #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 2)
 #else
-#define PKT_VLAN_PRESENT_BIT	0
 #define TC_AT_INGRESS_MASK		(1 << 7)
 #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 5)
 #endif
-- 
cgit 


From 2c925db0a7d69b404d6bfe4c037935c2d367913d Mon Sep 17 00:00:00 2001
From: Ofer Levi <oferle@nvidia.com>
Date: Tue, 9 Feb 2021 17:48:11 +0200
Subject: net/mlx5e: Support enhanced CQE compression

CQE compression feature improves performance by reducing PCI bandwidth
bottleneck on CQEs write.
Enhanced CQE compression introduced in ConnectX-6 and it aims to reduce
CPU utilization of SW side packets decompression by eliminating the
need to rewrite ownership bit, which is likely to cost a cache-miss, is
replaced by validity byte handled solely by HW.
Another advantage of the enhanced feature is that session packets are
available to SW as soon as a single CQE slot is filled, instead of
waiting for session to close, this improves packet latency from NIC to
host.

Performance:
Following are tested scenarios and reults comparing basic and enahnced
CQE compression.

setup: IXIA 100GbE connected directly to port 0 and port 1 of
ConnectX-6 Dx 100GbE dual port.

Case #1 RX only, single flow goes to single queue:
IRQ rate reduced by ~ 30%, CPU utilization improved by 2%.

Case #2 IP forwarding from port 1 to port 0 single flow goes to
single queue:
Avg latency improved from 60us to 21us, frame loss improved from 0.5% to 0.0%.

Case #3 IP forwarding from port 1 to port 0 Max Throughput IXIA sends
100%, 8192 UDP flows, goes to 24 queues:
Enhanced is equal or slightly better than basic.

Testing the basic compression feature with this patch shows there is
no perfrormance degradation of the basic compression feature.

Signed-off-by: Ofer Levi <oferle@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 1ff91cb79ded..eb3fac30488b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -882,6 +882,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
 	return cqe->op_own >> 4;
 }
 
+static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe)
+{
+	/* num_of_mini_cqes is zero based */
+	return get_cqe_opcode(cqe) + 1;
+}
+
 static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
 	return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
-- 
cgit 


From 92125c3a602454824d70edb1b2abb382811cab4f Mon Sep 17 00:00:00 2001
From: Nick Child <nnac123@linux.ibm.com>
Date: Thu, 10 Nov 2022 15:32:17 -0600
Subject: ibmvnic: Add hotpluggable CPU callbacks to reassign affinity hints

When CPU's are added and removed, ibmvnic devices will reassign
hint values. Introduce a new cpu hotplug state CPUHP_IBMVNIC_DEAD
to signal to ibmvnic devices that the CPU has been removed and it
is time to reset affinity hint assignments. On the other hand,
when CPU's are being added, add a state instance to
CPUHP_AP_ONLINE_DYN which will trigger a reassignment of affinity
hints once the new CPU's are online. This implementation is based
on the virtio_net driver.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: Nick Child <nnac123@linux.ibm.com>
Reviewed-by: Rick Lindsley <ricklind@linux.ibm.com>
Reviewed-by: Haren Myneni <haren@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f61447913db9..c8bc85a87b1e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -69,6 +69,7 @@ enum cpuhp_state {
 	CPUHP_X86_APB_DEAD,
 	CPUHP_X86_MCE_DEAD,
 	CPUHP_VIRT_NET_DEAD,
+	CPUHP_IBMVNIC_DEAD,
 	CPUHP_SLUB_DEAD,
 	CPUHP_DEBUG_OBJ_DEAD,
 	CPUHP_MM_WRITEBACK_DEAD,
-- 
cgit 


From 2d577252579b3efb9e934b68948a2edfa9920110 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:23 +0530
Subject: bpf: Remove BPF_MAP_OFF_ARR_MAX

In f71b2f64177a ("bpf: Refactor map->off_arr handling"), map->off_arr
was refactored to be btf_field_offs. The number of field offsets is
equal to maximum possible fields limited by BTF_FIELDS_MAX. Hence, reuse
BTF_FIELDS_MAX as spin_lock and timer no longer are to be handled
specially for offset sorting, fix the comment, and remove incorrect
WARN_ON as its rec->cnt can never exceed this value. The reason to keep
separate constant was the it was always more 2 more than total kptrs.
This is no longer the case.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 798aec816970..1a66a1df1af1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -165,9 +165,8 @@ struct bpf_map_ops {
 };
 
 enum {
-	/* Support at most 8 pointers in a BTF type */
-	BTF_FIELDS_MAX	      = 10,
-	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX,
+	/* Support at most 10 fields in a BTF type */
+	BTF_FIELDS_MAX	   = 10,
 };
 
 enum btf_field_type {
@@ -203,8 +202,8 @@ struct btf_record {
 
 struct btf_field_offs {
 	u32 cnt;
-	u32 field_off[BPF_MAP_OFF_ARR_MAX];
-	u8 field_sz[BPF_MAP_OFF_ARR_MAX];
+	u32 field_off[BTF_FIELDS_MAX];
+	u8 field_sz[BTF_FIELDS_MAX];
 };
 
 struct bpf_map {
-- 
cgit 


From e5feed0f64f73e167ef70755d3dc2db959d8fd5c Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:24 +0530
Subject: bpf: Fix copy_map_value, zero_map_value

The current offset needs to also skip over the already copied region in
addition to the size of the next field. This case manifests where there
are gaps between adjacent special fields.

It was observed that for a map value with size 48, having fields at:
off:  0, 16, 32
size: 4, 16, 16

The current code does:

memcpy(dst + 0, src + 0, 0)
memcpy(dst + 4, src + 4, 12)
memcpy(dst + 20, src + 20, 12)
memcpy(dst + 36, src + 36, 12)

With the fix, it is done correctly as:

memcpy(dst + 0, src + 0, 0)
memcpy(dst + 4, src + 4, 12)
memcpy(dst + 32, src + 32, 0)
memcpy(dst + 48, src + 48, 0)

Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1a66a1df1af1..f08eb2d27de0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -360,7 +360,7 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
 		u32 sz = next_off - curr_off;
 
 		memcpy(dst + curr_off, src + curr_off, sz);
-		curr_off += foffs->field_sz[i];
+		curr_off += foffs->field_sz[i] + sz;
 	}
 	memcpy(dst + curr_off, src + curr_off, size - curr_off);
 }
@@ -390,7 +390,7 @@ static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32
 		u32 sz = next_off - curr_off;
 
 		memset(dst + curr_off, 0, sz);
-		curr_off += foffs->field_sz[i];
+		curr_off += foffs->field_sz[i] + sz;
 	}
 	memset(dst + curr_off, 0, size - curr_off);
 }
-- 
cgit 


From f0c5941ff5b255413d31425bb327c2aec3625673 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:25 +0530
Subject: bpf: Support bpf_list_head in map values

Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.

The definition of bpf_list_head in a map value will be done as follows:

struct foo {
	struct bpf_list_node node;
	int data;
};

struct map_value {
	struct bpf_list_head head __contains(foo, node);
};

Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.

The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.

This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.

For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.

Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.

While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f08eb2d27de0..05f98e9e5c48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -175,6 +175,7 @@ enum btf_field_type {
 	BPF_KPTR_UNREF = (1 << 2),
 	BPF_KPTR_REF   = (1 << 3),
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
+	BPF_LIST_HEAD  = (1 << 4),
 };
 
 struct btf_field_kptr {
@@ -184,11 +185,18 @@ struct btf_field_kptr {
 	u32 btf_id;
 };
 
+struct btf_field_list_head {
+	struct btf *btf;
+	u32 value_btf_id;
+	u32 node_offset;
+};
+
 struct btf_field {
 	u32 offset;
 	enum btf_field_type type;
 	union {
 		struct btf_field_kptr kptr;
+		struct btf_field_list_head list_head;
 	};
 };
 
@@ -266,6 +274,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return "kptr";
+	case BPF_LIST_HEAD:
+		return "bpf_list_head";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -282,6 +292,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return sizeof(u64);
+	case BPF_LIST_HEAD:
+		return sizeof(struct bpf_list_head);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -298,6 +310,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return __alignof__(u64);
+	case BPF_LIST_HEAD:
+		return __alignof__(struct bpf_list_head);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -403,6 +417,9 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 			   bool lock_src);
 void bpf_timer_cancel_and_free(void *timer);
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+			struct bpf_spin_lock *spin_lock);
+
 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
 
 struct bpf_offload_dev;
-- 
cgit 


From 2de2669b4e52b2ae2f118bfc310004f50b47f0f5 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:26 +0530
Subject: bpf: Rename RET_PTR_TO_ALLOC_MEM

Currently, the verifier has two return types, RET_PTR_TO_ALLOC_MEM, and
RET_PTR_TO_ALLOC_MEM_OR_NULL, however the former is confusingly named to
imply that it carries MEM_ALLOC, while only the latter does. This causes
confusion during code review leading to conclusions like that the return
value of RET_PTR_TO_DYNPTR_MEM_OR_NULL (which is RET_PTR_TO_ALLOC_MEM |
PTR_MAYBE_NULL) may be consumable by bpf_ringbuf_{submit,commit}.

Rename it to make it clear MEM_ALLOC needs to be tacked on top of
RET_PTR_TO_MEM.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05f98e9e5c48..2fe3ec620d54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -607,7 +607,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET,		/* returns a pointer to a socket */
 	RET_PTR_TO_TCP_SOCK,		/* returns a pointer to a tcp_sock */
 	RET_PTR_TO_SOCK_COMMON,		/* returns a pointer to a sock_common */
-	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
+	RET_PTR_TO_MEM,			/* returns a pointer to memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
 	__BPF_RET_TYPE_MAX,
@@ -617,8 +617,8 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
 	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
-	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
-	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_MEM,
+	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
-- 
cgit 


From 894f2a8b1673a355a1a7507a4dfa6a3c836d07c1 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:27 +0530
Subject: bpf: Rename MEM_ALLOC to MEM_RINGBUF

Currently, verifier uses MEM_ALLOC type tag to specially tag memory
returned from bpf_ringbuf_reserve helper. However, this is currently
only used for this purpose and there is an implicit assumption that it
only refers to ringbuf memory (e.g. the check for ARG_PTR_TO_ALLOC_MEM
in check_func_arg_reg_off).

Hence, rename MEM_ALLOC to MEM_RINGBUF to indicate this special
relationship and instead open the use of MEM_ALLOC for more generic
allocations made for user types.

Also, since ARG_PTR_TO_ALLOC_MEM_OR_NULL is unused, simply drop it.

Finally, update selftests using 'alloc_' verifier string to 'ringbuf_'.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fe3ec620d54..afc1c51b59ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -488,10 +488,8 @@ enum bpf_type_flag {
 	 */
 	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),
 
-	/* MEM was "allocated" from a different helper, and cannot be mixed
-	 * with regular non-MEM_ALLOC'ed MEM types.
-	 */
-	MEM_ALLOC		= BIT(2 + BPF_BASE_TYPE_BITS),
+	/* MEM points to BPF ring buffer reservation. */
+	MEM_RINGBUF		= BIT(2 + BPF_BASE_TYPE_BITS),
 
 	/* MEM is in user address space. */
 	MEM_USER		= BIT(3 + BPF_BASE_TYPE_BITS),
@@ -565,7 +563,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
 	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
-	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
+	ARG_PTR_TO_RINGBUF_MEM,	/* pointer to dynamically reserved ringbuf memory */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
@@ -582,7 +580,6 @@ enum bpf_arg_type {
 	ARG_PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
 	ARG_PTR_TO_CTX_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
 	ARG_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
-	ARG_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
 	ARG_PTR_TO_STACK_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
 	ARG_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
 	/* pointer to memory does not need to be initialized, helper function must fill
@@ -617,7 +614,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
 	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
-	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_MEM,
+	RET_PTR_TO_RINGBUF_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
 	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
 
-- 
cgit 


From 6728aea7216c0c06c98e2e58d753a5e8b2ae1c6f Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:28 +0530
Subject: bpf: Refactor btf_struct_access

Instead of having to pass multiple arguments that describe the register,
pass the bpf_reg_state into the btf_struct_access callback. Currently,
all call sites simply reuse the btf and btf_id of the reg they want to
check the access of. The only exception to this pattern is the callsite
in check_ptr_to_map_access, hence for that case create a dummy reg to
simulate PTR_TO_BTF_ID access.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-8-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h    | 17 ++++++++---------
 include/linux/filter.h |  8 ++++----
 2 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index afc1c51b59ff..49f9d2bec401 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -771,6 +771,7 @@ struct bpf_prog_ops {
 			union bpf_attr __user *uattr);
 };
 
+struct bpf_reg_state;
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *
@@ -792,9 +793,8 @@ struct bpf_verifier_ops {
 				  struct bpf_insn *dst,
 				  struct bpf_prog *prog, u32 *target_size);
 	int (*btf_struct_access)(struct bpf_verifier_log *log,
-				 const struct btf *btf,
-				 const struct btf_type *t, int off, int size,
-				 enum bpf_access_type atype,
+				 const struct bpf_reg_state *reg,
+				 int off, int size, enum bpf_access_type atype,
 				 u32 *next_btf_id, enum bpf_type_flag *flag);
 };
 
@@ -2080,9 +2080,9 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
 	return btf_ctx_access(off, size, type, prog, info);
 }
 
-int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
-		      const struct btf_type *t, int off, int size,
-		      enum bpf_access_type atype,
+int btf_struct_access(struct bpf_verifier_log *log,
+		      const struct bpf_reg_state *reg,
+		      int off, int size, enum bpf_access_type atype,
 		      u32 *next_btf_id, enum bpf_type_flag *flag);
 bool btf_struct_ids_match(struct bpf_verifier_log *log,
 			  const struct btf *btf, u32 id, int off,
@@ -2333,9 +2333,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
 }
 
 static inline int btf_struct_access(struct bpf_verifier_log *log,
-				    const struct btf *btf,
-				    const struct btf_type *t, int off, int size,
-				    enum bpf_access_type atype,
+				    const struct bpf_reg_state *reg,
+				    int off, int size, enum bpf_access_type atype,
 				    u32 *next_btf_id, enum bpf_type_flag *flag)
 {
 	return -EACCES;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index efc42a6e3aed..787d35dbf5b0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -568,10 +568,10 @@ struct sk_filter {
 DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 
 extern struct mutex nf_conn_btf_access_lock;
-extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf,
-				     const struct btf_type *t, int off, int size,
-				     enum bpf_access_type atype, u32 *next_btf_id,
-				     enum bpf_type_flag *flag);
+extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
+				     const struct bpf_reg_state *reg,
+				     int off, int size, enum bpf_access_type atype,
+				     u32 *next_btf_id, enum bpf_type_flag *flag);
 
 typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
 					  const struct bpf_insn *insnsi,
-- 
cgit 


From 14d898f3c1b3bf9c4375ee3255ec9e9b89a35578 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 8 Nov 2022 15:05:59 +0100
Subject: dev: Move received_rps counter next to RPS members in softnet data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the received_rps counter value next to the other RPS-related members
in softnet_data. This closes two four-byte holes in the structure, making
room for another pointer in the first two cache lines without bumping the
xmit struct to its own line.

Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221108140601.149971-2-toke@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02a2318da7c7..ddc59ef98500 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3128,7 +3128,6 @@ struct softnet_data {
 	/* stats */
 	unsigned int		processed;
 	unsigned int		time_squeeze;
-	unsigned int		received_rps;
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 #endif
@@ -3161,6 +3160,7 @@ struct softnet_data {
 	unsigned int		cpu;
 	unsigned int		input_queue_tail;
 #endif
+	unsigned int		received_rps;
 	unsigned int		dropped;
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
-- 
cgit 


From 32637e33003f36e75e9147788cc0e2f21706ef99 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 8 Nov 2022 15:06:00 +0100
Subject: bpf: Expand map key argument of bpf_redirect_map to u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For queueing packets in XDP we want to add a new redirect map type with
support for 64-bit indexes. To prepare fore this, expand the width of the
'key' argument to the bpf_redirect_map() helper. Since BPF registers are
always 64-bit, this should be safe to do after the fact.

Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221108140601.149971-3-toke@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h    |  2 +-
 include/linux/filter.h | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49f9d2bec401..54462dd28824 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -135,7 +135,7 @@ struct bpf_map_ops {
 	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
 
 	/* Misc helpers.*/
-	int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+	int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
 
 	/* map_meta_equal must be implemented for maps that can be
 	 * used as an inner map.  It is a runtime check to ensure
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 787d35dbf5b0..bf701976056e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -643,13 +643,13 @@ struct bpf_nh_params {
 };
 
 struct bpf_redirect_info {
-	u32 flags;
-	u32 tgt_index;
+	u64 tgt_index;
 	void *tgt_value;
 	struct bpf_map *map;
+	u32 flags;
+	u32 kern_flags;
 	u32 map_id;
 	enum bpf_map_type map_type;
-	u32 kern_flags;
 	struct bpf_nh_params nh;
 };
 
@@ -1504,7 +1504,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
 						  u64 flags, const u64 flag_mask,
 						  void *lookup_elem(struct bpf_map *map, u32 key))
 {
@@ -1515,7 +1515,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
 	if (unlikely(flags & ~(action_mask | flag_mask)))
 		return XDP_ABORTED;
 
-	ri->tgt_value = lookup_elem(map, ifindex);
+	ri->tgt_value = lookup_elem(map, index);
 	if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
 		/* If the lookup fails we want to clear out the state in the
 		 * redirect_info struct completely, so that if an eBPF program
@@ -1527,7 +1527,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
 		return flags & action_mask;
 	}
 
-	ri->tgt_index = ifindex;
+	ri->tgt_index = index;
 	ri->map_id = map->id;
 	ri->map_type = map->map_type;
 
-- 
cgit 


From 06463f6e98df34908c26aa8e7a31a279646b1f51 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 9 Nov 2022 14:42:49 -0800
Subject: wifi: wl1251: drop support for platform data

Remove support for configuring the device via platform data because
there are no users of wl1251_platform_data left in the mainline kernel.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20221109224250.2885119-2-dmitry.torokhov@gmail.com
---
 include/linux/wl12xx.h | 44 --------------------------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 include/linux/wl12xx.h

(limited to 'include/linux')

diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
deleted file mode 100644
index 03d61f1d23ab..000000000000
--- a/include/linux/wl12xx.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * This file is part of wl12xx
- *
- * Copyright (C) 2009 Nokia Corporation
- *
- * Contact: Luciano Coelho <luciano.coelho@nokia.com>
- */
-
-#ifndef _LINUX_WL12XX_H
-#define _LINUX_WL12XX_H
-
-#include <linux/err.h>
-
-struct wl1251_platform_data {
-	int power_gpio;
-	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
-	int irq;
-	bool use_eeprom;
-};
-
-#ifdef CONFIG_WILINK_PLATFORM_DATA
-
-int wl1251_set_platform_data(const struct wl1251_platform_data *data);
-
-struct wl1251_platform_data *wl1251_get_platform_data(void);
-
-#else
-
-static inline
-int wl1251_set_platform_data(const struct wl1251_platform_data *data)
-{
-	return -ENOSYS;
-}
-
-static inline
-struct wl1251_platform_data *wl1251_get_platform_data(void)
-{
-	return ERR_PTR(-ENODATA);
-}
-
-#endif
-
-#endif
-- 
cgit 


From 9804985bf27f8fbcf0d96c7435b5ad94a2a6ea20 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 14 Nov 2022 13:57:57 -0800
Subject: udp: Introduce optional per-netns hash table.

The maximum hash table size is 64K due to the nature of the protocol. [0]
It's smaller than TCP, and fewer sockets can cause a performance drop.

On an EC2 c5.24xlarge instance (192 GiB memory), after running iperf3 in
different netns, creating 32Mi sockets without data transfer in the root
netns causes regression for the iperf3's connection.

  uhash_entries		sockets		length		Gbps
	    64K		      1		     1		5.69
			    1Mi		    16		5.27
			    2Mi		    32		4.90
			    4Mi		    64		4.09
			    8Mi		   128		2.96
			   16Mi		   256		2.06
			   32Mi		   512		1.12

The per-netns hash table breaks the lengthy lists into shorter ones.  It is
useful on a multi-tenant system with thousands of netns.  With smaller hash
tables, we can look up sockets faster, isolate noisy neighbours, and reduce
lock contention.

The max size of the per-netns table is 64K as well.  This is because the
possible hash range by udp_hashfn() always fits in 64K within the same
netns and we cannot make full use of the whole buckets larger than 64K.

  /* 0 < num < 64K  ->  X < hash < X + 64K */
  (num + net_hash_mix(net)) & mask;

Also, the min size is 128.  We use a bitmap to search for an available
port in udp_lib_get_port().  To keep the bitmap on the stack and not
fire the CONFIG_FRAME_WARN error at build time, we round up the table
size to 128.

The sysctl usage is the same with TCP:

  $ dmesg | cut -d ' ' -f 6- | grep "UDP hash"
  UDP hash table entries: 65536 (order: 9, 2097152 bytes, vmalloc)

  # sysctl net.ipv4.udp_hash_entries
  net.ipv4.udp_hash_entries = 65536  # can be changed by uhash_entries

  # sysctl net.ipv4.udp_child_hash_entries
  net.ipv4.udp_child_hash_entries = 0  # disabled by default

  # ip netns add test1
  # ip netns exec test1 sysctl net.ipv4.udp_hash_entries
  net.ipv4.udp_hash_entries = -65536  # share the global table

  # sysctl -w net.ipv4.udp_child_hash_entries=100
  net.ipv4.udp_child_hash_entries = 100

  # ip netns add test2
  # ip netns exec test2 sysctl net.ipv4.udp_hash_entries
  net.ipv4.udp_hash_entries = 128  # own a per-netns table with 2^n buckets

We could optimise the hash table lookup/iteration further by removing
the netns comparison for the per-netns one in the future.  Also, we
could optimise the sparse udp_hslot layout by putting it in udp_table.

[0]: https://lore.kernel.org/netdev/4ACC2815.7010101@gmail.com/

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index dea57aa37df6..a2892e151644 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 	return (struct udphdr *)skb_transport_header(skb);
 }
 
+#define UDP_HTABLE_SIZE_MIN_PERNET	128
 #define UDP_HTABLE_SIZE_MIN		(CONFIG_BASE_SMALL ? 128 : 256)
+#define UDP_HTABLE_SIZE_MAX		65536
 
 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
 {
-- 
cgit 


From 6c1c5097781f563b70a81683ea6fdac21637573b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Nov 2022 08:53:55 +0000
Subject: net: add atomic_long_t to net_device_stats fields

Long standing KCSAN issues are caused by data-race around
some dev->stats changes.

Most performance critical paths already use per-cpu
variables, or per-queue ones.

It is reasonable (and more correct) to use atomic operations
for the slow paths.

This patch adds an union for each field of net_device_stats,
so that we can convert paths that are not yet protected
by a spinlock or a mutex.

netdev_stats_to_stats64() no longer has an #if BITS_PER_LONG==64

Note that the memcpy() we were using on 64bit arches
had no provision to avoid load-tearing,
while atomic_long_read() is providing the needed protection
at no cost.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 58 ++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02a2318da7c7..23b3903b0678 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -171,31 +171,38 @@ static inline bool dev_xmit_complete(int rc)
  *	(unsigned long) so they can be read and written atomically.
  */
 
+#define NET_DEV_STAT(FIELD)			\
+	union {					\
+		unsigned long FIELD;		\
+		atomic_long_t __##FIELD;	\
+	}
+
 struct net_device_stats {
-	unsigned long	rx_packets;
-	unsigned long	tx_packets;
-	unsigned long	rx_bytes;
-	unsigned long	tx_bytes;
-	unsigned long	rx_errors;
-	unsigned long	tx_errors;
-	unsigned long	rx_dropped;
-	unsigned long	tx_dropped;
-	unsigned long	multicast;
-	unsigned long	collisions;
-	unsigned long	rx_length_errors;
-	unsigned long	rx_over_errors;
-	unsigned long	rx_crc_errors;
-	unsigned long	rx_frame_errors;
-	unsigned long	rx_fifo_errors;
-	unsigned long	rx_missed_errors;
-	unsigned long	tx_aborted_errors;
-	unsigned long	tx_carrier_errors;
-	unsigned long	tx_fifo_errors;
-	unsigned long	tx_heartbeat_errors;
-	unsigned long	tx_window_errors;
-	unsigned long	rx_compressed;
-	unsigned long	tx_compressed;
+	NET_DEV_STAT(rx_packets);
+	NET_DEV_STAT(tx_packets);
+	NET_DEV_STAT(rx_bytes);
+	NET_DEV_STAT(tx_bytes);
+	NET_DEV_STAT(rx_errors);
+	NET_DEV_STAT(tx_errors);
+	NET_DEV_STAT(rx_dropped);
+	NET_DEV_STAT(tx_dropped);
+	NET_DEV_STAT(multicast);
+	NET_DEV_STAT(collisions);
+	NET_DEV_STAT(rx_length_errors);
+	NET_DEV_STAT(rx_over_errors);
+	NET_DEV_STAT(rx_crc_errors);
+	NET_DEV_STAT(rx_frame_errors);
+	NET_DEV_STAT(rx_fifo_errors);
+	NET_DEV_STAT(rx_missed_errors);
+	NET_DEV_STAT(tx_aborted_errors);
+	NET_DEV_STAT(tx_carrier_errors);
+	NET_DEV_STAT(tx_fifo_errors);
+	NET_DEV_STAT(tx_heartbeat_errors);
+	NET_DEV_STAT(tx_window_errors);
+	NET_DEV_STAT(rx_compressed);
+	NET_DEV_STAT(tx_compressed);
 };
+#undef NET_DEV_STAT
 
 /* per-cpu stats, allocated on demand.
  * Try to fit them in a single cache line, for dev_get_stats() sake.
@@ -5171,4 +5178,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 
 extern struct net_device *blackhole_netdev;
 
+/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */
+#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
+#define DEV_STATS_ADD(DEV, FIELD, VAL) 	\
+		atomic_long_add((VAL), &(DEV)->stats.__##FIELD)
+
 #endif	/* _LINUX_NETDEVICE_H */
-- 
cgit 


From 3af43ba4c6019b29c048921eb8147eb010165329 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 16 Nov 2022 15:50:58 +0800
Subject: bpf: Pass map file to .map_update_batch directly

Currently bpf_map_do_batch() first invokes fdget(batch.map_fd) to get
the target map file, then it invokes generic_map_update_batch() to do
batch update. generic_map_update_batch() will get the target map file
by using fdget(batch.map_fd) again and pass it to bpf_map_update_value().

The problem is map file returned by the second fdget() may be NULL or a
totally different file compared by map file in bpf_map_do_batch(). The
reason is that the first fdget() only guarantees the liveness of struct
file instead of file descriptor and the file description may be released
by concurrent close() through pick_file().

It doesn't incur any problem as for now, because maps with batch update
support don't use map file in .map_fd_get_ptr() ops. But it is better to
fix the potential access of an invalid map file.

Using __bpf_map_get() again in generic_map_update_batch() can not fix
the problem, because batch.map_fd may be closed and reopened, and the
returned map file may be different with map file got in
bpf_map_do_batch(), so just passing the map file directly to
.map_update_batch() in bpf_map_do_batch().

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221116075059.1551277-1-houtao@huaweicloud.com
---
 include/linux/bpf.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 54462dd28824..e60a5c052473 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -85,7 +85,8 @@ struct bpf_map_ops {
 	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
 					   const union bpf_attr *attr,
 					   union bpf_attr __user *uattr);
-	int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
+	int (*map_update_batch)(struct bpf_map *map, struct file *map_file,
+				const union bpf_attr *attr,
 				union bpf_attr __user *uattr);
 	int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
 				union bpf_attr __user *uattr);
@@ -1789,7 +1790,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 int  generic_map_lookup_batch(struct bpf_map *map,
 			      const union bpf_attr *attr,
 			      union bpf_attr __user *uattr);
-int  generic_map_update_batch(struct bpf_map *map,
+int  generic_map_update_batch(struct bpf_map *map, struct file *map_file,
 			      const union bpf_attr *attr,
 			      union bpf_attr __user *uattr);
 int  generic_map_delete_batch(struct bpf_map *map,
-- 
cgit 


From 282de143ead96a5d53331e946f31c977b4610a74 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:25:55 +0530
Subject: bpf: Introduce allocated objects support

Introduce support for representing pointers to objects allocated by the
BPF program, i.e. PTR_TO_BTF_ID that point to a type in program BTF.
This is indicated by the presence of MEM_ALLOC type flag in reg->type to
avoid having to check btf_is_kernel when trying to match argument types
in helpers.

Whenever walking such types, any pointers being walked will always yield
a SCALAR instead of pointer. In the future we might permit kptr inside
such allocated objects (either kernel or program allocated), and it will
then form a PTR_TO_BTF_ID of the respective type.

For now, such allocated objects will always be referenced in verifier
context, hence ref_obj_id == 0 for them is a bug. It is allowed to write
to such objects, as long fields that are special are not touched
(support for which will be added in subsequent patches). Note that once
such a pointer is marked PTR_UNTRUSTED, it is no longer allowed to write
to it.

No PROBE_MEM handling is therefore done for loads into this type unless
PTR_UNTRUSTED is part of the register type, since they can never be in
an undefined state, and their lifetime will always be valid.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60a5c052473..7440c20c4192 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -525,6 +525,11 @@ enum bpf_type_flag {
 	/* Size is known at compile time. */
 	MEM_FIXED_SIZE		= BIT(10 + BPF_BASE_TYPE_BITS),
 
+	/* MEM is of an allocated object of type in program BTF. This is used to
+	 * tag PTR_TO_BTF_ID allocated using bpf_obj_new.
+	 */
+	MEM_ALLOC		= BIT(11 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
@@ -2792,4 +2797,10 @@ struct bpf_key {
 	bool has_ref;
 };
 #endif /* CONFIG_KEYS */
+
+static inline bool type_is_alloc(u32 type)
+{
+	return type & MEM_ALLOC;
+}
+
 #endif /* _LINUX_BPF_H */
-- 
cgit 


From 8ffa5cc142137a59d6a10eb5273fa2ba5dcd4947 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:25:56 +0530
Subject: bpf: Recognize lock and list fields in allocated objects

Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.

Also update btf_struct_access to reject direct access to these special
fields.

A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.

While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.

Note that the specification of such types can be done just like map
values, e.g.:

struct bar {
	struct bpf_list_node node;
};

struct foo {
	struct bpf_spin_lock lock;
	struct bpf_list_head head __contains(bar, node);
	struct bpf_list_node node;
};

struct map_value {
	struct bpf_spin_lock lock;
	struct bpf_list_head head __contains(foo, node);
};

To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.

Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.

A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h |  7 +++++++
 include/linux/btf.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7440c20c4192..eb6ea53fa5a2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -177,6 +177,7 @@ enum btf_field_type {
 	BPF_KPTR_REF   = (1 << 3),
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
 	BPF_LIST_HEAD  = (1 << 4),
+	BPF_LIST_NODE  = (1 << 5),
 };
 
 struct btf_field_kptr {
@@ -277,6 +278,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 		return "kptr";
 	case BPF_LIST_HEAD:
 		return "bpf_list_head";
+	case BPF_LIST_NODE:
+		return "bpf_list_node";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -295,6 +298,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 		return sizeof(u64);
 	case BPF_LIST_HEAD:
 		return sizeof(struct bpf_list_head);
+	case BPF_LIST_NODE:
+		return sizeof(struct bpf_list_node);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -313,6 +318,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 		return __alignof__(u64);
 	case BPF_LIST_HEAD:
 		return __alignof__(struct bpf_list_head);
+	case BPF_LIST_NODE:
+		return __alignof__(struct bpf_list_node);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d80345fa566b..a01a8da20021 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,8 @@
 
 #include <linux/types.h>
 #include <linux/bpfptr.h>
+#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
 #include <uapi/linux/btf.h>
 #include <uapi/linux/bpf.h>
 
@@ -78,6 +80,17 @@ struct btf_id_dtor_kfunc {
 	u32 kfunc_btf_id;
 };
 
+struct btf_struct_meta {
+	u32 btf_id;
+	struct btf_record *record;
+	struct btf_field_offs *field_offs;
+};
+
+struct btf_struct_metas {
+	u32 cnt;
+	struct btf_struct_meta types[];
+};
+
 typedef void (*btf_dtor_kfunc_t)(void *);
 
 extern const struct file_operations btf_fops;
@@ -408,6 +421,23 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
 	return (struct btf_param *)(t + 1);
 }
 
+static inline int btf_id_cmp_func(const void *a, const void *b)
+{
+	const int *pa = a, *pb = b;
+
+	return *pa - *pb;
+}
+
+static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+{
+	return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+}
+
+static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+	return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
 
@@ -423,6 +453,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
 int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
 				struct module *owner);
+struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
@@ -454,6 +485,10 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt
 {
 	return 0;
 }
+static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
+{
+	return NULL;
+}
 #endif
 
 static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
-- 
cgit 


From 865ce09a49d79d2b2c1d980f4c05ffc0b3517bdc Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:25:57 +0530
Subject: bpf: Verify ownership relationships for user BTF types

Ensure that there can be no ownership cycles among different types by
way of having owning objects that can hold some other type as their
element. For instance, a map value can only hold allocated objects, but
these are allowed to have another bpf_list_head. To prevent unbounded
recursion while freeing resources, elements of bpf_list_head in local
kptrs can never have a bpf_list_head which are part of list in a map
value. Later patches will verify this by having dedicated BTF selftests.

Also, to make runtime destruction easier, once btf_struct_metas is fully
populated, we can stash the metadata of the value type directly in the
metadata of the list_head fields, as that allows easier access to the
value type's layout to destruct it at runtime from the btf_field entry
of the list head itself.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-8-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 include/linux/btf.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index eb6ea53fa5a2..323985a39ece 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -191,6 +191,7 @@ struct btf_field_list_head {
 	struct btf *btf;
 	u32 value_btf_id;
 	u32 node_offset;
+	struct btf_record *value_rec;
 };
 
 struct btf_field {
diff --git a/include/linux/btf.h b/include/linux/btf.h
index a01a8da20021..42d8f3730a8d 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -178,6 +178,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
 struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
 				    u32 field_mask, u32 value_size);
+int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
 struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
-- 
cgit 


From d0d78c1df9b1dbfb5e172de473561ce09d5e9d39 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:25:59 +0530
Subject: bpf: Allow locking bpf_spin_lock global variables

Global variables reside in maps accessible using direct_value_addr
callbacks, so giving each load instruction's rewrite a unique reg->id
disallows us from holding locks which are global.

The reason for preserving reg->id as a unique value for registers that
may point to spin lock is that two separate lookups are treated as two
separate memory regions, and any possible aliasing is ignored for the
purposes of spin lock correctness.

This is not great especially for the global variable case, which are
served from maps that have max_entries == 1, i.e. they always lead to
map values pointing into the same map value.

So refactor the active_spin_lock into a 'active_lock' structure which
represents the lock identity, and instead of the reg->id, remember two
fields, a pointer and the reg->id. The pointer will store reg->map_ptr
or reg->btf. It's only necessary to distinguish for the id == 0 case of
global variables, but always setting the pointer to a non-NULL value and
using the pointer to check whether the lock is held simplifies code in
the verifier.

This is generic enough to allow it for global variables, map lookups,
and allocated objects at the same time.

Note that while whether a lock is held can be answered by just comparing
active_lock.ptr to NULL, to determine whether the register is pointing
to the same held lock requires comparing _both_ ptr and id.

Finally, as a result of this refactoring, pseudo load instructions are
not given a unique reg->id, as they are doing lookup for the same map
value (max_entries is never greater than 1).

Essentially, we consider that the tuple of (ptr, id) will always be
unique for any kind of argument to bpf_spin_{lock,unlock}.

Note that this can be extended in the future to also remember offset
used for locking, so that we can introduce multiple bpf_spin_lock fields
in the same allocation.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-10-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1a32baa78ce2..1db2b4dc7009 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -323,7 +323,21 @@ struct bpf_verifier_state {
 	u32 branches;
 	u32 insn_idx;
 	u32 curframe;
-	u32 active_spin_lock;
+	/* For every reg representing a map value or allocated object pointer,
+	 * we consider the tuple of (ptr, id) for them to be unique in verifier
+	 * context and conside them to not alias each other for the purposes of
+	 * tracking lock state.
+	 */
+	struct {
+		/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
+		 * there's no active lock held, and other fields have no
+		 * meaning. If non-NULL, it indicates that a lock is held and
+		 * id member has the reg->id of the register which can be >= 0.
+		 */
+		void *ptr;
+		/* This will be reg->id */
+		u32 id;
+	} active_lock;
 	bool speculative;
 
 	/* first and last insn idx of this verifier state */
-- 
cgit 


From 00b85860feb809852af9a88cb4ca8766d7dff6a3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:01 +0530
Subject: bpf: Rewrite kfunc argument handling

As we continue to add more features, argument types, kfunc flags, and
different extensions to kfuncs, the code to verify the correctness of
the kfunc prototype wrt the passed in registers has become ad-hoc and
ugly to read. To make life easier, and make a very clear split between
different stages of argument processing, move all the code into
verifier.c and refactor into easier to read helpers and functions.

This also makes sharing code within the verifier easier with kfunc
argument processing. This will be more and more useful in later patches
as we are now moving to implement very core BPF helpers as kfuncs, to
keep them experimental before baking into UAPI.

Remove all kfunc related bits now from btf_check_func_arg_match, as
users have been converted away to refactored kfunc argument handling.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-12-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 11 -----------
 include/linux/bpf_verifier.h |  2 --
 include/linux/btf.h          | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 323985a39ece..0a74df731eb8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2109,22 +2109,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 			   const char *func_name,
 			   struct btf_func_model *m);
 
-struct bpf_kfunc_arg_meta {
-	u64 r0_size;
-	bool r0_rdonly;
-	int ref_obj_id;
-	u32 flags;
-};
-
 struct bpf_reg_state;
 int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 				struct bpf_reg_state *regs);
 int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 			   struct bpf_reg_state *regs);
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
-			      const struct btf *btf, u32 func_id,
-			      struct bpf_reg_state *regs,
-			      struct bpf_kfunc_arg_meta *meta);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			  struct bpf_reg_state *reg);
 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1db2b4dc7009..fb146b0ce006 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -603,8 +603,6 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
 int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
 			   enum bpf_arg_type arg_type);
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-			     u32 regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 		   u32 regno, u32 mem_size);
 bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 42d8f3730a8d..d5b26380a60f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -338,6 +338,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
 	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
 }
 
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
 static inline u16 btf_type_vlen(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
@@ -439,9 +449,10 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
 	return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
 }
 
-#ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
+struct bpf_verifier_log;
 
+#ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
@@ -455,6 +466,12 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
 int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
 				struct module *owner);
 struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+		      const struct btf_type *t, enum bpf_prog_type prog_type,
+		      int arg);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+			const struct btf *btf2, u32 id2);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
@@ -490,6 +507,18 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf
 {
 	return NULL;
 }
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+		      const struct btf_type *t, enum bpf_prog_type prog_type,
+		      int arg)
+{
+	return NULL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+				      const struct btf *btf2, u32 id2)
+{
+	return false;
+}
 #endif
 
 static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
-- 
cgit 


From 958cf2e273f0929c66169e0788031310e8118722 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:03 +0530
Subject: bpf: Introduce bpf_obj_new

Introduce type safe memory allocator bpf_obj_new for BPF programs. The
kernel side kfunc is named bpf_obj_new_impl, as passing hidden arguments
to kfuncs still requires having them in prototype, unlike BPF helpers
which always take 5 arguments and have them checked using bpf_func_proto
in verifier, ignoring unset argument types.

Introduce __ign suffix to ignore a specific kfunc argument during type
checks, then use this to introduce support for passing type metadata to
the bpf_obj_new_impl kfunc.

The user passes BTF ID of the type it wants to allocates in program BTF,
the verifier then rewrites the first argument as the size of this type,
after performing some sanity checks (to ensure it exists and it is a
struct type).

The second argument is also fixed up and passed by the verifier. This is
the btf_struct_meta for the type being allocated. It would be needed
mostly for the offset array which is required for zero initializing
special fields while leaving the rest of storage in unitialized state.

It would also be needed in the next patch to perform proper destruction
of the object's special fields.

Under the hood, bpf_obj_new will call bpf_mem_alloc and bpf_mem_free,
using the any context BPF memory allocator introduced recently. To this
end, a global instance of the BPF memory allocator is initialized on
boot to be used for this purpose. This 'bpf_global_ma' serves all
allocations for bpf_obj_new. In the future, bpf_obj_new variants will
allow specifying a custom allocator.

Note that now that bpf_obj_new can be used to allocate objects that can
be linked to BPF linked list (when future linked list helpers are
available), we need to also free the elements using bpf_mem_free.
However, since the draining of elements is done outside the
bpf_spin_lock, we need to do migrate_disable around the call since
bpf_list_head_free can be called from map free path where migration is
enabled. Otherwise, when called from BPF programs migration is already
disabled.

A convenience macro is included in the bpf_experimental.h header to hide
over the ugly details of the implementation, leading to user code
looking similar to a language level extension which allocates and
constructs fields of a user type.

struct bar {
	struct bpf_list_node node;
};

struct foo {
	struct bpf_spin_lock lock;
	struct bpf_list_head head __contains(bar, node);
};

void prog(void) {
	struct foo *f;

	f = bpf_obj_new(typeof(*f));
	if (!f)
		return;
	...
}

A key piece of this story is still missing, i.e. the free function,
which will come in the next patch.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-14-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 21 +++++++++++++--------
 include/linux/bpf_verifier.h |  2 ++
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0a74df731eb8..8b32376ce746 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -54,6 +54,8 @@ struct cgroup;
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
+extern struct bpf_mem_alloc bpf_global_ma;
+extern bool bpf_global_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -334,16 +336,19 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
 	return rec->field_mask & type;
 }
 
-static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
 {
-	if (!IS_ERR_OR_NULL(map->record)) {
-		struct btf_field *fields = map->record->fields;
-		u32 cnt = map->record->cnt;
-		int i;
+	int i;
 
-		for (i = 0; i < cnt; i++)
-			memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
-	}
+	if (!foffs)
+		return;
+	for (i = 0; i < foffs->cnt; i++)
+		memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
+}
+
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+	bpf_obj_init(map->field_offs, dst);
 }
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index fb146b0ce006..3dc72d396dfc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -433,6 +433,8 @@ struct bpf_insn_aux_data {
 		 */
 		struct bpf_loop_inline_state loop_inline_state;
 	};
+	u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */
+	struct btf_struct_meta *kptr_struct_meta;
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
-- 
cgit 


From 534e86bc6c66e1e0c798a1c0a6a680bb231c08db Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:07 +0530
Subject: bpf: Add 'release on unlock' logic for bpf_list_push_{front,back}

This commit implements the delayed release logic for bpf_list_push_front
and bpf_list_push_back.

Once a node has been added to the list, it's pointer changes to
PTR_UNTRUSTED. However, it is only released once the lock protecting the
list is unlocked. For such PTR_TO_BTF_ID | MEM_ALLOC with PTR_UNTRUSTED
set but an active ref_obj_id, it is still permitted to read them as long
as the lock is held. Writing to them is not allowed.

This allows having read access to push items we no longer own until we
release the lock guarding the list, allowing a little more flexibility
when working with these APIs.

Note that enabling write support has fairly tricky interactions with
what happens inside the critical section. Just as an example, currently,
bpf_obj_drop is not permitted, but if it were, being able to write to
the PTR_UNTRUSTED pointer while the object gets released back to the
memory allocator would violate safety properties we wish to guarantee
(i.e. not crashing the kernel). The memory could be reused for a
different type in the BPF program or even in the kernel as it gets
eventually kfree'd.

Not enabling bpf_obj_drop inside the critical section would appear to
prevent all of the above, but that is more of an artifical limitation
right now. Since the write support is tangled with how we handle
potential aliasing of nodes inside the critical section that may or may
not be part of the list anymore, it has been deferred to a future patch.

Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-18-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3dc72d396dfc..23f30c685f28 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -223,6 +223,11 @@ struct bpf_reference_state {
 	 * exiting a callback function.
 	 */
 	int callback_ref;
+	/* Mark the reference state to release the registers sharing the same id
+	 * on bpf_spin_unlock (for nodes that we will lose ownership to but are
+	 * safe to access inside the critical section).
+	 */
+	bool release_on_unlock;
 };
 
 /* state of the program:
-- 
cgit 


From f20a0a0519f35a3a34236ed2d38b067c5cb22b9f Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Thu, 17 Nov 2022 02:18:28 +0900
Subject: ethtool: doc: clarify what drivers can implement in their
 get_drvinfo()

Many of the drivers which implement ethtool_ops::get_drvinfo() will
prints the .driver, .version or .bus_info of struct ethtool_drvinfo.
To have a glance of current state, do:

  $ git grep -W "get_drvinfo(struct"

Printing in those three fields is useless because:

  - since [1], the driver version should be the kernel version (at
    least for upstream drivers). Arguably, out of tree drivers might
    still want to set a custom version, but out of tree is not our
    focus.

  - since [2], the core is able to provide default values for .driver
    and .bus_info.

In summary, drivers may provide .fw_version and .erom_version, the
rest is expected to be done by the core.

In struct ethtool_ops doc from linux/ethtool: rephrase field
get_drvinfo() doc to discourage developers from implementing this
callback.

In struct ethtool_drvinfo doc from uapi/linux/ethtool.h: remove the
paragraph mentioning what drivers should do. Rationale: no need to
repeat what is already written in struct ethtool_ops doc. But add a
note that .fw_version and .erom_version are driver defined.

Also update the dummy driver and simply remove the callback in order
not to confuse the newcomers: most of the drivers will not need this
callback function any more.

[1] commit 6a7e25c7fb48 ("net/core: Replace driver version to be
    kernel version")
Link: https://git.kernel.org/torvalds/linux/c/6a7e25c7fb48

[2] commit edaf5df22cb8 ("ethtool: ethtool_get_drvinfo: populate
    drvinfo fields even if callback exits")
Link: https://git.kernel.org/netdev/net-next/c/edaf5df22cb8

Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20221116171828.4093-1-mailhol.vincent@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 5c51c7fda32a..9e0a76fc7de9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -473,10 +473,10 @@ struct ethtool_module_power_mode_params {
  *	parameter.
  * @supported_coalesce_params: supported types of interrupt coalescing.
  * @supported_ring_params: supported ring params.
- * @get_drvinfo: Report driver/device information.  Should only set the
- *	@driver, @version, @fw_version and @bus_info fields.  If not
- *	implemented, the @driver and @bus_info fields will be filled in
- *	according to the netdev's parent device.
+ * @get_drvinfo: Report driver/device information. Modern drivers no
+ *	longer have to implement this callback. Most fields are
+ *	correctly filled in by the core using system information, or
+ *	populated using other driver operations.
  * @get_regs_len: Get buffer length required for @get_regs
  * @get_regs: Get device registers
  * @get_wol: Report whether Wake-on-Lan is enabled
-- 
cgit 


From 647541ea06a7bbbcf941e501c726b3e26328c102 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 15 Nov 2022 10:40:21 -0500
Subject: sctp: move SCTP_PAD4 and SCTP_TRUNC4 to linux/sctp.h

Move these two macros from net/sctp/sctp.h to linux/sctp.h, so that
it will be enough to include only linux/sctp.h in nft_exthdr.c and
xt_sctp.c. It should not include "net/sctp/sctp.h" if a module does
not have a dependence on SCTP module.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Saeed Mahameed <saeed@kernel.org>
Link: https://lore.kernel.org/r/ef6468a687f36da06f575c2131cd4612f6b7be88.1668526821.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sctp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index a86e852507b3..358dc08e0831 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -820,4 +820,9 @@ struct sctp_new_encap_port_hdr {
 	__be16 new_port;
 };
 
+/* Round an int up to the next multiple of 4.  */
+#define SCTP_PAD4(s) (((s)+3)&~3)
+/* Truncate to the previous multiple of 4.  */
+#define SCTP_TRUNC4(s) ((s)&~3)
+
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit 


From ef66c5475d7fb864c2418d3bdd19dee46324624b Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 19 Nov 2022 23:10:01 -0600
Subject: bpf: Allow multiple modifiers in reg_type_str() prefix

reg_type_str() in the verifier currently only allows a single register
type modifier to be present in the 'prefix' string which is eventually
stored in the env type_str_buf. This currently works fine because there
are no overlapping type modifiers, but once PTR_TRUSTED is added, that
will no longer be the case. This patch updates reg_type_str() to support
having multiple modifiers in the prefix string, and updates the size of
type_str_buf to be 128 bytes.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 23f30c685f28..608dde740fef 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -19,7 +19,7 @@
  */
 #define BPF_MAX_VAR_SIZ	(1 << 29)
 /* size of type_str_buf in bpf_verifier. */
-#define TYPE_STR_BUF_LEN 64
+#define TYPE_STR_BUF_LEN 128
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
-- 
cgit 


From 3f00c52393445ed49aadc1a567aa502c6333b1a1 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 19 Nov 2022 23:10:02 -0600
Subject: bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs

Kfuncs currently support specifying the KF_TRUSTED_ARGS flag to signal
to the verifier that it should enforce that a BPF program passes it a
"safe", trusted pointer. Currently, "safe" means that the pointer is
either PTR_TO_CTX, or is refcounted. There may be cases, however, where
the kernel passes a BPF program a safe / trusted pointer to an object
that the BPF program wishes to use as a kptr, but because the object
does not yet have a ref_obj_id from the perspective of the verifier, the
program would be unable to pass it to a KF_ACQUIRE | KF_TRUSTED_ARGS
kfunc.

The solution is to expand the set of pointers that are considered
trusted according to KF_TRUSTED_ARGS, so that programs can invoke kfuncs
with these pointers without getting rejected by the verifier.

There is already a PTR_UNTRUSTED flag that is set in some scenarios,
such as when a BPF program reads a kptr directly from a map
without performing a bpf_kptr_xchg() call. These pointers of course can
and should be rejected by the verifier. Unfortunately, however,
PTR_UNTRUSTED does not cover all the cases for safety that need to
be addressed to adequately protect kfuncs. Specifically, pointers
obtained by a BPF program "walking" a struct are _not_ considered
PTR_UNTRUSTED according to BPF. For example, say that we were to add a
kfunc called bpf_task_acquire(), with KF_ACQUIRE | KF_TRUSTED_ARGS, to
acquire a struct task_struct *. If we only used PTR_UNTRUSTED to signal
that a task was unsafe to pass to a kfunc, the verifier would mistakenly
allow the following unsafe BPF program to be loaded:

SEC("tp_btf/task_newtask")
int BPF_PROG(unsafe_acquire_task,
             struct task_struct *task,
             u64 clone_flags)
{
        struct task_struct *acquired, *nested;

        nested = task->last_wakee;

        /* Would not be rejected by the verifier. */
        acquired = bpf_task_acquire(nested);
        if (!acquired)
                return 0;

        bpf_task_release(acquired);
        return 0;
}

To address this, this patch defines a new type flag called PTR_TRUSTED
which tracks whether a PTR_TO_BTF_ID pointer is safe to pass to a
KF_TRUSTED_ARGS kfunc or a BPF helper function. PTR_TRUSTED pointers are
passed directly from the kernel as a tracepoint or struct_ops callback
argument. Any nested pointer that is obtained from walking a PTR_TRUSTED
pointer is no longer PTR_TRUSTED. From the example above, the struct
task_struct *task argument is PTR_TRUSTED, but the 'nested' pointer
obtained from 'task->last_wakee' is not PTR_TRUSTED.

A subsequent patch will add kfuncs for storing a task kfunc as a kptr,
and then another patch will add selftests to validate.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 30 ++++++++++++++++++++
 include/linux/bpf_verifier.h |  7 +++++
 include/linux/btf.h          | 65 ++++++++++++++++++++++++++++----------------
 3 files changed, 78 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8b32376ce746..c9eafa67f2a2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -543,6 +543,35 @@ enum bpf_type_flag {
 	 */
 	MEM_ALLOC		= BIT(11 + BPF_BASE_TYPE_BITS),
 
+	/* PTR was passed from the kernel in a trusted context, and may be
+	 * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+	 * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
+	 * PTR_UNTRUSTED refers to a kptr that was read directly from a map
+	 * without invoking bpf_kptr_xchg(). What we really need to know is
+	 * whether a pointer is safe to pass to a kfunc or BPF helper function.
+	 * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF
+	 * helpers, they do not cover all possible instances of unsafe
+	 * pointers. For example, a pointer that was obtained from walking a
+	 * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the
+	 * fact that it may be NULL, invalid, etc. This is due to backwards
+	 * compatibility requirements, as this was the behavior that was first
+	 * introduced when kptrs were added. The behavior is now considered
+	 * deprecated, and PTR_UNTRUSTED will eventually be removed.
+	 *
+	 * PTR_TRUSTED, on the other hand, is a pointer that the kernel
+	 * guarantees to be valid and safe to pass to kfuncs and BPF helpers.
+	 * For example, pointers passed to tracepoint arguments are considered
+	 * PTR_TRUSTED, as are pointers that are passed to struct_ops
+	 * callbacks. As alluded to above, pointers that are obtained from
+	 * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a
+	 * struct task_struct *task is PTR_TRUSTED, then accessing
+	 * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored
+	 * in a BPF register. Similarly, pointers passed to certain programs
+	 * types such as kretprobes are not guaranteed to be valid, as they may
+	 * for example contain an object that was recently freed.
+	 */
+	PTR_TRUSTED		= BIT(12 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
@@ -636,6 +665,7 @@ enum bpf_return_type {
 	RET_PTR_TO_RINGBUF_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
 	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+	RET_PTR_TO_BTF_ID_TRUSTED	= PTR_TRUSTED	 | RET_PTR_TO_BTF_ID,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 608dde740fef..545152ac136c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -680,4 +680,11 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	}
 }
 
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+
+static inline bool bpf_type_has_unsafe_modifiers(u32 type)
+{
+	return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
+}
+
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d5b26380a60f..d38aa4251c28 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -19,36 +19,53 @@
 #define KF_RELEASE	(1 << 1) /* kfunc is a release function */
 #define KF_RET_NULL	(1 << 2) /* kfunc returns a pointer that may be NULL */
 #define KF_KPTR_GET	(1 << 3) /* kfunc returns reference to a kptr */
-/* Trusted arguments are those which are meant to be referenced arguments with
- * unchanged offset. It is used to enforce that pointers obtained from acquire
- * kfuncs remain unmodified when being passed to helpers taking trusted args.
+/* Trusted arguments are those which are guaranteed to be valid when passed to
+ * the kfunc. It is used to enforce that pointers obtained from either acquire
+ * kfuncs, or from the main kernel on a tracepoint or struct_ops callback
+ * invocation, remain unmodified when being passed to helpers taking trusted
+ * args.
  *
- * Consider
- *	struct foo {
- *		int data;
- *		struct foo *next;
- *	};
+ * Consider, for example, the following new task tracepoint:
  *
- *	struct bar {
- *		int data;
- *		struct foo f;
- *	};
+ *	SEC("tp_btf/task_newtask")
+ *	int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags)
+ *	{
+ *		...
+ *	}
  *
- *	struct foo *f = alloc_foo(); // Acquire kfunc
- *	struct bar *b = alloc_bar(); // Acquire kfunc
+ * And the following kfunc:
  *
- * If a kfunc set_foo_data() wants to operate only on the allocated object, it
- * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like:
+ *	BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
  *
- *	set_foo_data(f, 42);	   // Allowed
- *	set_foo_data(f->next, 42); // Rejected, non-referenced pointer
- *	set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type
- *	set_foo_data(&b->f, 42);   // Rejected, referenced, but bad offset
+ * All invocations to the kfunc must pass the unmodified, unwalked task:
  *
- * In the final case, usually for the purposes of type matching, it is deduced
- * by looking at the type of the member at the offset, but due to the
- * requirement of trusted argument, this deduction will be strict and not done
- * for this case.
+ *	bpf_task_acquire(task);		    // Allowed
+ *	bpf_task_acquire(task->last_wakee); // Rejected, walked task
+ *
+ * Programs may also pass referenced tasks directly to the kfunc:
+ *
+ *	struct task_struct *acquired;
+ *
+ *	acquired = bpf_task_acquire(task);	// Allowed, same as above
+ *	bpf_task_acquire(acquired);		// Allowed
+ *	bpf_task_acquire(task);			// Allowed
+ *	bpf_task_acquire(acquired->last_wakee); // Rejected, walked task
+ *
+ * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or
+ * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these
+ * pointers are guaranteed to be safe. For example, the following BPF program
+ * would be rejected:
+ *
+ * SEC("kretprobe/free_task")
+ * int BPF_PROG(free_task_probe, struct task_struct *tsk)
+ * {
+ *	struct task_struct *acquired;
+ *
+ *	acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer
+ *	bpf_task_release(acquired);
+ *
+ *	return 0;
+ * }
  */
 #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
 #define KF_SLEEPABLE    (1 << 5) /* kfunc may sleep */
-- 
cgit 


From fd264ca020948a743e4c36731dfdecc4a812153c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sun, 20 Nov 2022 11:54:32 -0800
Subject: bpf: Add a kfunc to type cast from bpf uapi ctx to kernel ctx

Implement bpf_cast_to_kern_ctx() kfunc which does a type cast
of a uapi ctx object to the corresponding kernel ctx. Previously
if users want to access some data available in kctx but not
in uapi ctx, bpf_probe_read_kernel() helper is needed.
The introduction of bpf_cast_to_kern_ctx() allows direct
memory access which makes code simpler and easier to understand.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221120195432.3113982-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index d38aa4251c28..9ed00077db6e 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -487,6 +487,7 @@ const struct btf_member *
 btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, enum bpf_prog_type prog_type,
 		      int arg);
+int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
 bool btf_types_are_same(const struct btf *btf1, u32 id1,
 			const struct btf *btf2, u32 id2);
 #else
@@ -531,6 +532,10 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 {
 	return NULL;
 }
+static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
+				      enum bpf_prog_type prog_type) {
+	return -EINVAL;
+}
 static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
 				      const struct btf *btf2, u32 id2)
 {
-- 
cgit 


From 19d05ea712ecbbb67d302664da5ec58b37b9aece Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Nov 2022 15:55:54 +0200
Subject: net: dsa: move tag_8021q headers to their proper place

tag_8021q definitions are all over the place. Some are exported to
linux/dsa/8021q.h (visible by DSA core, taggers, switch drivers and
everyone else), and some are in dsa_priv.h.

Move the structures that don't need external visibility into tag_8021q.c,
and the ones which don't need the world or switch drivers to see them
into tag_8021q.h.

We also have the tag_8021q.h inclusion from switch.c, which is basically
the entire reason why tag_8021q.c was built into DSA in commit
8b6e638b4be2 ("net: dsa: build tag_8021q.c as part of DSA core").
I still don't know how to better deal with that, so leave it alone.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/8021q.h | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 3ed117e299ec..f3664ee12170 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -5,28 +5,8 @@
 #ifndef _NET_DSA_8021Q_H
 #define _NET_DSA_8021Q_H
 
-#include <linux/refcount.h>
-#include <linux/types.h>
 #include <net/dsa.h>
-
-struct dsa_switch;
-struct dsa_port;
-struct sk_buff;
-struct net_device;
-
-struct dsa_tag_8021q_vlan {
-	struct list_head list;
-	int port;
-	u16 vid;
-	refcount_t refcount;
-};
-
-struct dsa_8021q_context {
-	struct dsa_switch *ds;
-	struct list_head vlans;
-	/* EtherType of RX VID, used for filtering on master interface */
-	__be16 proto;
-};
+#include <linux/types.h>
 
 int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
 
@@ -38,15 +18,6 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
 void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port,
 				struct dsa_bridge bridge);
 
-struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
-			       u16 tpid, u16 tci);
-
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
-		   int *vbid);
-
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
-						   int vbid);
-
 u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num);
 
 u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp);
-- 
cgit 


From beb3d47d1d3d7185bb401af628ad32ee204a9526 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 23 Nov 2022 07:57:59 -0800
Subject: bpf: Fix a BTF_ID_LIST bug with CONFIG_DEBUG_INFO_BTF not set

With CONFIG_DEBUG_INFO_BTF not set, we hit the following compilation error,
  /.../kernel/bpf/verifier.c:8196:23: error: array index 6 is past the end of the array
  (that has type 'u32[5]' (aka 'unsigned int[5]')) [-Werror,-Warray-bounds]
        if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
                             ^                  ~~~~~~~~~~~~~~~~~~~~~~~
  /.../kernel/bpf/verifier.c:8174:1: note: array 'special_kfunc_list' declared here
  BTF_ID_LIST(special_kfunc_list)
  ^
  /.../include/linux/btf_ids.h:207:27: note: expanded from macro 'BTF_ID_LIST'
  #define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
                            ^
  /.../kernel/bpf/verifier.c:8443:19: error: array index 5 is past the end of the array
  (that has type 'u32[5]' (aka 'unsigned int[5]')) [-Werror,-Warray-bounds]
                 btf_id == special_kfunc_list[KF_bpf_list_pop_back];
                           ^                  ~~~~~~~~~~~~~~~~~~~~
  /.../kernel/bpf/verifier.c:8174:1: note: array 'special_kfunc_list' declared here
  BTF_ID_LIST(special_kfunc_list)
  ^
  /.../include/linux/btf_ids.h:207:27: note: expanded from macro 'BTF_ID_LIST'
  #define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
  ...

Fix the problem by increase the size of BTF_ID_LIST to 16 to avoid compilation error
and also prevent potentially unintended issue due to out-of-bound access.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221123155759.2669749-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index c9744efd202f..93397711a68c 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -204,7 +204,7 @@ extern struct btf_id_set8 name;
 
 #else
 
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[16];
 #define BTF_ID(prefix, name)
 #define BTF_ID_FLAGS(prefix, name, ...)
 #define BTF_ID_UNUSED
-- 
cgit 


From 5a0f663f0189cf9e031e444f97c029717a99548d Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 23 Nov 2022 21:32:06 -0800
Subject: compiler_types: Define __rcu as __attribute__((btf_type_tag("rcu")))

Currently, without rcu attribute info in BTF, the verifier treats
rcu tagged pointer as a normal pointer. This might be a problem
for sleepable program where rcu_read_lock()/unlock() is not available.
For example, for a sleepable fentry program, if rcu protected memory
access is interleaved with a sleepable helper/kfunc, it is possible
the memory access after the sleepable helper/kfunc might be invalid
since the object might have been freed then. To prevent such cases,
introducing rcu tagging for memory accesses in verifier can help
to reject such programs.

To enable rcu tagging in BTF, during kernel compilation,
define __rcu as attribute btf_type_tag("rcu") so __rcu information can
be preserved in dwarf and btf, and later can be used for bpf prog verification.

Acked-by: KP Singh <kpsingh@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221124053206.2373141-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/compiler_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index eb0466236661..7c1afe0f4129 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 # endif
 # define __iomem
 # define __percpu	BTF_TYPE_TAG(percpu)
-# define __rcu
+# define __rcu		BTF_TYPE_TAG(rcu)
+
 # define __chk_user_ptr(x)	(void)0
 # define __chk_io_ptr(x)	(void)0
 /* context/locking */
-- 
cgit 


From 01685c5bddaa6df3d662c8afed5e5289fcc68e5a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 23 Nov 2022 21:32:11 -0800
Subject: bpf: Introduce might_sleep field in bpf_func_proto

Introduce bpf_func_proto->might_sleep to indicate a particular helper
might sleep. This will make later check whether a helper might be
sleepable or not easier.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221124053211.2373553-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c9eafa67f2a2..43fd7eeeeabb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -682,6 +682,7 @@ struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
 	bool pkt_access;
+	bool might_sleep;
 	enum bpf_return_type ret_type;
 	union {
 		struct {
-- 
cgit 


From 9bb00b2895cbfe0ad410457b605d0a72524168c1 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 23 Nov 2022 21:32:17 -0800
Subject: bpf: Add kfunc bpf_rcu_read_lock/unlock()

Add two kfunc's bpf_rcu_read_lock() and bpf_rcu_read_unlock(). These two kfunc's
can be used for all program types. The following is an example about how
rcu pointer are used w.r.t. bpf_rcu_read_lock()/bpf_rcu_read_unlock().

  struct task_struct {
    ...
    struct task_struct              *last_wakee;
    struct task_struct __rcu        *real_parent;
    ...
  };

Let us say prog does 'task = bpf_get_current_task_btf()' to get a
'task' pointer. The basic rules are:
  - 'real_parent = task->real_parent' should be inside bpf_rcu_read_lock
    region. This is to simulate rcu_dereference() operation. The
    'real_parent' is marked as MEM_RCU only if (1). task->real_parent is
    inside bpf_rcu_read_lock region, and (2). task is a trusted ptr. So
    MEM_RCU marked ptr can be 'trusted' inside the bpf_rcu_read_lock region.
  - 'last_wakee = real_parent->last_wakee' should be inside bpf_rcu_read_lock
    region since it tries to access rcu protected memory.
  - the ptr 'last_wakee' will be marked as PTR_UNTRUSTED since in general
    it is not clear whether the object pointed by 'last_wakee' is valid or
    not even inside bpf_rcu_read_lock region.

The verifier will reset all rcu pointer register states to untrusted
at bpf_rcu_read_unlock() kfunc call site, so any such rcu pointer
won't be trusted any more outside the bpf_rcu_read_lock() region.

The current implementation does not support nested rcu read lock
region in the prog.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221124053217.2373910-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          | 3 +++
 include/linux/bpf_verifier.h | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 43fd7eeeeabb..c6aa6912ea16 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -572,6 +572,9 @@ enum bpf_type_flag {
 	 */
 	PTR_TRUSTED		= BIT(12 + BPF_BASE_TYPE_BITS),
 
+	/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+	MEM_RCU			= BIT(13 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 545152ac136c..c05aa6e1f6f5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -344,6 +344,7 @@ struct bpf_verifier_state {
 		u32 id;
 	} active_lock;
 	bool speculative;
+	bool active_rcu_lock;
 
 	/* first and last insn idx of this verifier state */
 	u32 first_insn_idx;
@@ -445,6 +446,7 @@ struct bpf_insn_aux_data {
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
 	bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
 	bool zext_dst; /* this insn zero extends dst reg */
+	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
 	u8 alu_state; /* used in combination with alu_limit */
 
 	/* below fields are initialized once */
@@ -534,6 +536,7 @@ struct bpf_verifier_env {
 	bool bypass_spec_v1;
 	bool bypass_spec_v4;
 	bool seen_direct_write;
+	bool rcu_tag_supported;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	}
 }
 
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
 
 static inline bool bpf_type_has_unsafe_modifiers(u32 type)
 {
-- 
cgit 


From a66d79ee0bd5140a64b72cde588f8c83a55a1eb9 Mon Sep 17 00:00:00 2001
From: Sujuan Chen <sujuan.chen@mediatek.com>
Date: Thu, 24 Nov 2022 11:18:14 +0800
Subject: net: ethernet: mtk_wed: add wcid overwritten support for wed v1

All wed versions should enable the wcid overwritten feature,
since the wcid size is controlled by the wlan driver.

Tested-by: Sujuan Chen <sujuan.chen@mediatek.com>
Co-developed-by: Bo Jiao <bo.jiao@mediatek.com>
Signed-off-by: Bo Jiao <bo.jiao@mediatek.com>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/soc/mediatek/mtk_wed.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 8294978f4bca..1b1ef57609f7 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -85,6 +85,9 @@ struct mtk_wed_device {
 	int irq;
 	u8 version;
 
+	/* used by wlan driver */
+	u32 rev_id;
+
 	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
 	struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES];
 	struct mtk_wed_ring txfree_ring;
-- 
cgit 


From 4c47867bc789bdc722f3bb760355c2c246fbe9af Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 24 Nov 2022 12:15:54 +0100
Subject: of: net: export of_get_mac_address_nvmem()

Export

	of_get_mac_addr_nvmem()

and rename it to

	of_get_mac_address_nvmem()

in order to fit the convention followed by the existing exported helpers
of the same kind.

This way, OF compatible drivers using eg. fwnode_get_mac_address() can
do a direct call to it instead of calling of_get_mac_address() just for
the nvmem step, avoiding to repeat an expensive DT lookup which has
already been done once.

Eventually, fwnode_get_mac_address() should probably be updated to
perform the nvmem lookup directly, but as of today, nvmem cells seem not
to be supported by ACPI yet which would defeat this kind of extension.

Suggested-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/of_net.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 0484b613ca64..d88715a0b3a5 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -14,6 +14,7 @@
 struct net_device;
 extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
 extern int of_get_mac_address(struct device_node *np, u8 *mac);
+extern int of_get_mac_address_nvmem(struct device_node *np, u8 *mac);
 int of_get_ethdev_address(struct device_node *np, struct net_device *dev);
 extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
@@ -28,6 +29,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac)
 	return -ENODEV;
 }
 
+static inline int of_get_mac_address_nvmem(struct device_node *np, u8 *mac)
+{
+	return -ENODEV;
+}
+
 static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
 {
 	return -ENODEV;
-- 
cgit 


From f78cd9c783e09a0fe454b0fc8b39c22025d7869e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 24 Nov 2022 16:22:53 +0100
Subject: net: ethernet: mtk_wed: update mtk_wed_stop

Update mtk_wed_stop routine and rename old mtk_wed_stop() to
mtk_wed_deinit(). This is a preliminary patch to add Wireless Ethernet
Dispatcher reset support.

Co-developed-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/soc/mediatek/mtk_wed.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index 1b1ef57609f7..c43510e541df 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -234,6 +234,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
 	(_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
 #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
 	(_dev)->ops->msg_update(_dev, _id, _msg, _len)
+#define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev)
+#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
 #else
 static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 {
@@ -250,6 +252,8 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 #define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV
 #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash)  do {} while (0)
 #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
+#define mtk_wed_device_stop(_dev) do {} while (0)
+#define mtk_wed_device_dma_reset(_dev) do {} while (0)
 #endif
 
 #endif
-- 
cgit 


From 23dca7a90017ff2512c501f7da4c7ca7a95c2d6e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 24 Nov 2022 16:22:55 +0100
Subject: net: ethernet: mtk_wed: add reset to tx_ring_setup callback

Introduce reset parameter to mtk_wed_tx_ring_setup signature.
This is a preliminary patch to add Wireless Ethernet Dispatcher reset
support.

Co-developed-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/soc/mediatek/mtk_wed.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index c43510e541df..beb190449704 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -158,7 +158,7 @@ struct mtk_wed_device {
 struct mtk_wed_ops {
 	int (*attach)(struct mtk_wed_device *dev);
 	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
-			     void __iomem *regs);
+			     void __iomem *regs, bool reset);
 	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
 			     void __iomem *regs);
 	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
@@ -216,8 +216,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
 #define mtk_wed_device_active(_dev) !!(_dev)->ops
 #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
 #define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask)
-#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \
-	(_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) \
+	(_dev)->ops->tx_ring_setup(_dev, _ring, _regs, _reset)
 #define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
 	(_dev)->ops->txfree_ring_setup(_dev, _regs)
 #define mtk_wed_device_reg_read(_dev, _reg) \
@@ -243,7 +243,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 }
 #define mtk_wed_device_detach(_dev) do {} while (0)
 #define mtk_wed_device_start(_dev, _mask) do {} while (0)
-#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV
 #define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
 #define mtk_wed_device_reg_read(_dev, _reg) 0
 #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
-- 
cgit 


From 12eb0f84a601cec8920b56d7ffd4182a6bfd6521 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Thu, 29 Sep 2022 13:45:08 +0200
Subject: net/mlx5: Remove unused ctx variables

Remove mlx5_priv.ctx_list and ctx_lock which are no longer used after
commit 601c10c89cbb ("net/mlx5: Delete custom device management logic").

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 06cbad166225..d476255c9a3f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -606,8 +606,6 @@ struct mlx5_priv {
 	struct list_head        pgdir_list;
 	/* end: alloc staff */
 
-	struct list_head        ctx_list;
-	spinlock_t              ctx_lock;
 	struct mlx5_adev       **adev;
 	int			adev_idx;
 	int			sw_vhca_id;
-- 
cgit 


From b146658f2ed90768b769222ca418617274242b32 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 31 Oct 2022 14:18:22 +0200
Subject: net/mlx5e: Add padding when needed in UMR WQEs

Per the device spec, MTTs/KLMs list in a UMR WQE must be aligned to 64B.
Per our SW design, the MTT/KLMs list would need alignment only if it's
too small, for example on PPC when PAGE_SIZE is 64KB, and only 4 pages
are needed to cover a MPWQE of size 256KB.

Padding, if needed, is taken into account when calculating the UMR WQE
fields (ds_cnt and xlt_octowords), however no entries are provided,
instead garbage is passed.

No real harm though, as these parts act as gaps between the RX MPWQEs
and not used by any of them. Hence, in practice, device does not try to
write any incoming packet to them. Still, prefer providing clean padding
marking the end of the list, and do not map garbage into the RQ memory
region.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index eb3fac30488b..97275965f156 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -294,6 +294,7 @@ enum {
 #define MLX5_UMR_MTT_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
 
-- 
cgit 


From 683d78a0d46235edfd3c50ddbc4af1065b422670 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 31 Oct 2022 14:30:23 +0200
Subject: net/mlx5: Remove unused UMR MTT definitions

Defines MLX5_UMR_MTT_MASK and MLX5_UMR_MTT_MIN_CHUNK_SIZE are not in
use. Remove them.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 97275965f156..ecf840e2989b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -292,8 +292,6 @@ enum {
 
 #define MLX5_UMR_KLM_ALIGNMENT 4
 #define MLX5_UMR_MTT_ALIGNMENT 0x40
-#define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
-#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
-- 
cgit 


From 02648b4b09d506bd4df2e17bf109c229fc728640 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 31 Oct 2022 14:44:57 +0200
Subject: net/mlx5: Generalize name of UMR alignment definition

Per the device spec, MLX5_UMR_MTT_ALIGNMENT is good not only for UMR MTT
entries, but for all other entries as well, like KLMs and KSMs.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ecf840e2989b..a02f779f5c5b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -291,8 +291,8 @@ enum {
 };
 
 #define MLX5_UMR_KLM_ALIGNMENT 4
-#define MLX5_UMR_MTT_ALIGNMENT 0x40
-#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt))
+#define MLX5_UMR_FLEX_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
 
-- 
cgit 


From daab2e9c54a52dea8dbd1af516e6f986eeed2556 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 31 Oct 2022 14:24:02 +0200
Subject: net/mlx5: Use generic definition for UMR KLM alignment

MLX5_UMR_KLM_ALIGNMENT is in units of number of entries, while
MLX5_UMR_MTT_ALIGNMENT (generalized and renamed to
MLX5_UMR_FLEX_ALIGNMENT) is in byte units. This is misleading and
confusing.
Replace this KLM definition with one based on the generic definition.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index a02f779f5c5b..5fe5d198b57a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -290,9 +290,9 @@ enum {
 	MLX5_UMR_INLINE			= (1 << 7),
 };
 
-#define MLX5_UMR_KLM_ALIGNMENT 4
 #define MLX5_UMR_FLEX_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
+#define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
 
-- 
cgit 


From e9374524950512a1769f610a868fcdf89ea59b8e Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Tue, 22 Nov 2022 20:30:57 +0100
Subject: netfilter: ipset: Add support for new bitmask parameter

Add a new parameter to complement the existing 'netmask' option. The
main difference between netmask and bitmask is that bitmask takes any
arbitrary ip address as input, it does not have to be a valid netmask.

The name of the new parameter is 'bitmask'. This lets us mask out
arbitrary bits in the ip address, for example:
ipset create set1 hash:ip bitmask 255.128.255.0
ipset create set2 hash:ip,port family inet6 bitmask ffff::ff80

Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ada1296c87d5..ab934ad951a8 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -515,6 +515,16 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
 	*skbinfo = ext->skbinfo;
 }
 
+static inline void
+nf_inet_addr_mask_inplace(union nf_inet_addr *a1,
+			  const union nf_inet_addr *mask)
+{
+	a1->all[0] &= mask->all[0];
+	a1->all[1] &= mask->all[1];
+	a1->all[2] &= mask->all[2];
+	a1->all[3] &= mask->all[3];
+}
+
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1, .target = true,\
 	  .timeout = ip_set_adt_opt_timeout(opt, set) }
-- 
cgit 


From c67cae551f0df80421b5703ee56ff5e2fe9c4de6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 25 Nov 2022 14:06:17 -0800
Subject: bpf: Tighten ptr_to_btf_id checks.

The networking programs typically don't require CAP_PERFMON, but through kfuncs
like bpf_cast_to_kern_ctx() they can access memory through PTR_TO_BTF_ID. In
such case enforce CAP_PERFMON.
Also make sure that only GPL programs can access kernel data structures.
All kfuncs require GPL already.

Also remove allow_ptr_to_map_access. It's the same as allow_ptr_leaks and
different name for the same check only causes confusion.

Fixes: fd264ca02094 ("bpf: Add a kfunc to type cast from bpf uapi ctx to kernel ctx")
Fixes: 50c6b8a9aea2 ("selftests/bpf: Add a test for btf_type_tag "percpu"")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221125220617.26846-1-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h          | 5 -----
 include/linux/bpf_verifier.h | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 67452103bb86..4920ac252754 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1909,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void)
 	return perfmon_capable();
 }
 
-static inline bool bpf_allow_ptr_to_map_access(void)
-{
-	return perfmon_capable();
-}
-
 static inline bool bpf_bypass_spec_v1(void)
 {
 	return perfmon_capable();
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c05aa6e1f6f5..b5090e89cb3f 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -531,7 +531,6 @@ struct bpf_verifier_env {
 	bool explore_alu_limits;
 	bool allow_ptr_leaks;
 	bool allow_uninit_stack;
-	bool allow_ptr_to_map_access;
 	bool bpf_capable;
 	bool bypass_spec_v1;
 	bool bypass_spec_v4;
-- 
cgit 


From a351d6087bf7d3d8440d58d3bf244ec64b89394a Mon Sep 17 00:00:00 2001
From: Pengcheng Yang <yangpc@wangsu.com>
Date: Tue, 29 Nov 2022 18:40:39 +0800
Subject: bpf, sockmap: Fix missing BPF_F_INGRESS flag when using apply_bytes

When redirecting, we use sk_msg_to_ingress() to get the BPF_F_INGRESS
flag from the msg->flags. If apply_bytes is used and it is larger than
the current data being processed, sk_psock_msg_verdict() will not be
called when sendmsg() is called again. At this time, the msg->flags is 0,
and we lost the BPF_F_INGRESS flag.

So we need to save the BPF_F_INGRESS flag in sk_psock and use it when
redirection.

Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support")
Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/1669718441-2654-3-git-send-email-yangpc@wangsu.com
---
 include/linux/skmsg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 70d6cb94e580..84f787416a54 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -82,6 +82,7 @@ struct sk_psock {
 	u32				apply_bytes;
 	u32				cork_bytes;
 	u32				eval;
+	bool				redir_ingress; /* undefined if sk_redir is null */
 	struct sk_msg			*cork;
 	struct sk_psock_progs		progs;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
-- 
cgit 


From 3144bfa5078e0df7507a4de72061501e6a0e56be Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 29 Nov 2022 21:21:47 -0800
Subject: bpf: Fix a compilation failure with clang lto build

When building the kernel with clang lto (CONFIG_LTO_CLANG_FULL=y), the
following compilation error will appear:

  $ make LLVM=1 LLVM_IAS=1 -j
  ...
  ld.lld: error: ld-temp.o <inline asm>:26889:1: symbol 'cgroup_storage_map_btf_ids' is already defined
  cgroup_storage_map_btf_ids:;
  ^
  make[1]: *** [/.../bpf-next/scripts/Makefile.vmlinux_o:61: vmlinux.o] Error 1

In local_storage.c, we have
  BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map)
Commit c4bcfb38a95e ("bpf: Implement cgroup storage available to
non-cgroup-attached bpf progs") added the above identical BTF_ID_LIST_SINGLE
definition in bpf_cgrp_storage.c. With duplicated definitions, llvm linker
complains with lto build.

Also, extracting btf_id of 'struct bpf_local_storage_map' is defined four times
for sk, inode, task and cgrp local storages. Let us define a single global one
with a different name than cgroup_storage_map_btf_ids, which also fixed
the lto compilation error.

Fixes: c4bcfb38a95e ("bpf: Implement cgroup storage available to non-cgroup-attached bpf progs")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221130052147.1591625-1-yhs@fb.com
---
 include/linux/btf_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 93397711a68c..3a4f7cd882ca 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -266,5 +266,6 @@ MAX_BTF_TRACING_TYPE,
 
 extern u32 btf_tracing_ids[];
 extern u32 bpf_cgroup_btf_id[];
+extern u32 bpf_local_storage_map_btf_id[];
 
 #endif
-- 
cgit 


From bc66fa87d4fda9053a8145e5718fc278c2b88253 Mon Sep 17 00:00:00 2001
From: Xiaolei Wang <xiaolei.wang@windriver.com>
Date: Wed, 30 Nov 2022 10:12:16 +0800
Subject: net: phy: Add link between phy dev and mac dev

If the external phy used by current mac interface is
managed by another mac interface, it means that this
network port cannot work independently, especially
when the system suspends and resumes, the following
trace may appear, so we should create a device link
between phy dev and mac dev.

  WARNING: CPU: 0 PID: 24 at drivers/net/phy/phy.c:983 phy_error+0x20/0x68
  Modules linked in:
  CPU: 0 PID: 24 Comm: kworker/0:2 Not tainted 6.1.0-rc3-00011-g5aaef24b5c6d-dirty #34
  Hardware name: Freescale i.MX6 SoloX (Device Tree)
  Workqueue: events_power_efficient phy_state_machine
  unwind_backtrace from show_stack+0x10/0x14
  show_stack from dump_stack_lvl+0x68/0x90
  dump_stack_lvl from __warn+0xb4/0x24c
  __warn from warn_slowpath_fmt+0x5c/0xd8
  warn_slowpath_fmt from phy_error+0x20/0x68
  phy_error from phy_state_machine+0x22c/0x23c
  phy_state_machine from process_one_work+0x288/0x744
  process_one_work from worker_thread+0x3c/0x500
  worker_thread from kthread+0xf0/0x114
  kthread from ret_from_fork+0x14/0x28
  Exception stack(0xf0951fb0 to 0xf0951ff8)

Signed-off-by: Xiaolei Wang <xiaolei.wang@windriver.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20221130021216.1052230-1-xiaolei.wang@windriver.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9a3752c0c444..71eeb4e3b1fd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -529,6 +529,8 @@ struct macsec_ops;
  *
  * @mdio: MDIO bus this PHY is on
  * @drv: Pointer to the driver for this PHY instance
+ * @devlink: Create a link between phy dev and mac dev, if the external phy
+ *           used by current mac interface is managed by another mac interface.
  * @phy_id: UID for this device found during discovery
  * @c45_ids: 802.3-c45 Device Identifiers if is_c45.
  * @is_c45:  Set to true if this PHY uses clause 45 addressing.
@@ -618,6 +620,8 @@ struct phy_device {
 	/* And management functions */
 	struct phy_driver *drv;
 
+	struct device_link *devlink;
+
 	u32 phy_id;
 
 	struct phy_c45_device_ids c45_ids;
-- 
cgit 


From 278ab9793116a8531ffaa52b4e61644971131e35 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 30 Nov 2022 13:26:45 -0800
Subject: wifi: ieee80211: Do not open-code qos address offsets

When building with -Wstringop-overflow, GCC's KASAN implementation does
not correctly perform bounds checking within some complex structures
when faced with literal offsets, and can get very confused. For example,
this warning is seen due to literal offsets into sturct ieee80211_hdr
that may or may not be large enough:

drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function 'iwl_mvm_rx_mpdu_mq':
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:2022:29: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
 2022 |                         *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
In file included from drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h:32,
                 from drivers/net/wireless/intel/iwlwifi/mvm/sta.h:15,
                 from drivers/net/wireless/intel/iwlwifi/mvm/mvm.h:27,
                 from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:10:
drivers/net/wireless/intel/iwlwifi/mvm/../fw/api/rx.h:559:16: note: at offset [78, 166] into destination object 'mpdu_len' of size 2
  559 |         __le16 mpdu_len;
      |                ^~~~~~~~

Refactor ieee80211_get_qos_ctl() to avoid using literal offsets,
requiring the creation of the actual structure that is described in the
comments. Explicitly choose the desired offset, making the code more
human-readable too. This is one of the last remaining warning to fix
before enabling -Wstringop-overflow globally.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97490
Link: https://github.com/KSPP/linux/issues/181
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Kalle Valo <kvalo@kernel.org>
Cc: Gregory Greenman <gregory.greenman@intel.com>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221130212641.never.627-kees@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 6252f02f38b7..80d6308dea06 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -338,6 +338,17 @@ struct ieee80211_qos_hdr {
 	__le16 qos_ctrl;
 } __packed __aligned(2);
 
+struct ieee80211_qos_hdr_4addr {
+	__le16 frame_control;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctrl;
+	u8 addr4[ETH_ALEN];
+	__le16 qos_ctrl;
+} __packed __aligned(2);
+
 struct ieee80211_trigger {
 	__le16 frame_control;
 	__le16 duration;
@@ -4060,16 +4071,21 @@ struct ieee80211_he_6ghz_capa {
  * @hdr: the frame
  *
  * The qos ctrl bytes come after the frame_control, duration, seq_num
- * and 3 or 4 addresses of length ETH_ALEN.
- * 3 addr: 2 + 2 + 2 + 3*6 = 24
- * 4 addr: 2 + 2 + 2 + 4*6 = 30
+ * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose
+ * between struct ieee80211_qos_hdr_4addr and struct ieee80211_qos_hdr.
  */
 static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
 {
-	if (ieee80211_has_a4(hdr->frame_control))
-		return (u8 *)hdr + 30;
+	union {
+		struct ieee80211_qos_hdr	addr3;
+		struct ieee80211_qos_hdr_4addr	addr4;
+	} *qos;
+
+	qos = (void *)hdr;
+	if (ieee80211_has_a4(qos->addr3.frame_control))
+		return (u8 *)&qos->addr4.qos_ctrl;
 	else
-		return (u8 *)hdr + 24;
+		return (u8 *)&qos->addr3.qos_ctrl;
 }
 
 /**
-- 
cgit 


From eb8c507296f6038d46010396d91b42a05c3b64d9 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Wed, 23 Nov 2022 17:38:55 +0000
Subject: jump_label: Prevent key->enabled int overflow

1. With CONFIG_JUMP_LABEL=n static_key_slow_inc() doesn't have any
   protection against key->enabled refcounter overflow.
2. With CONFIG_JUMP_LABEL=y static_key_slow_inc_cpuslocked()
   still may turn the refcounter negative as (v + 1) may overflow.

key->enabled is indeed a ref-counter as it's documented in multiple
places: top comment in jump_label.h, Documentation/staging/static-keys.rst,
etc.

As -1 is reserved for static key that's in process of being enabled,
functions would break with negative key->enabled refcount:
- for CONFIG_JUMP_LABEL=n negative return of static_key_count()
  breaks static_key_false(), static_key_true()
- the ref counter may become 0 from negative side by too many
  static_key_slow_inc() calls and lead to use-after-free issues.

These flaws result in that some users have to introduce an additional
mutex and prevent the reference counter from overflowing themselves,
see bpf_enable_runtime_stats() checking the counter against INT_MAX / 2.

Prevent the reference counter overflow by checking if (v + 1) > 0.
Change functions API to return whether the increment was successful.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/jump_label.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 570831ca9951..4e968ebadce6 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -224,9 +224,10 @@ extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
 					    enum jump_label_type type);
 extern void arch_jump_label_transform_apply(void);
 extern int jump_label_text_reserved(void *start, void *end);
-extern void static_key_slow_inc(struct static_key *key);
+extern bool static_key_slow_inc(struct static_key *key);
+extern bool static_key_fast_inc_not_disabled(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
-extern void static_key_slow_inc_cpuslocked(struct static_key *key);
+extern bool static_key_slow_inc_cpuslocked(struct static_key *key);
 extern void static_key_slow_dec_cpuslocked(struct static_key *key);
 extern int static_key_count(struct static_key *key);
 extern void static_key_enable(struct static_key *key);
@@ -278,11 +279,23 @@ static __always_inline bool static_key_true(struct static_key *key)
 	return false;
 }
 
-static inline void static_key_slow_inc(struct static_key *key)
+static inline bool static_key_fast_inc_not_disabled(struct static_key *key)
 {
+	int v;
+
 	STATIC_KEY_CHECK_USE(key);
-	atomic_inc(&key->enabled);
+	/*
+	 * Prevent key->enabled getting negative to follow the same semantics
+	 * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment.
+	 */
+	v = atomic_read(&key->enabled);
+	do {
+		if (v < 0 || (v + 1) < 0)
+			return false;
+	} while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1)));
+	return true;
 }
+#define static_key_slow_inc(key)	static_key_fast_inc_not_disabled(key)
 
 static inline void static_key_slow_dec(struct static_key *key)
 {
-- 
cgit 


From d93607082e982223cf92750f2d9039ff365b9d24 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 30 Nov 2022 23:28:26 +0100
Subject: net: add netdev_sw_irq_coalesce_default_on()

Add a helper for drivers wanting to set SW IRQ coalescing
by default. The related sysfs attributes can be used to
override the default values.

Follow Jakub's suggestion and put this functionality into
net core so that drivers wanting to use software interrupt
coalescing per default don't have to open-code it.

Note that this function needs to be called before the
netdevice is registered.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5aa35c58c342..f78db610ada5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -78,6 +78,7 @@ struct xdp_buff;
 void synchronize_net(void);
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev);
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
-- 
cgit 


From fca1aa75518c03b04c3c249e9a9134faf9ca18c5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 3 Dec 2022 10:46:02 -0800
Subject: bpf: Handle MEM_RCU type properly

Commit 9bb00b2895cb ("bpf: Add kfunc bpf_rcu_read_lock/unlock()")
introduced MEM_RCU and bpf_rcu_read_lock/unlock() support. In that
commit, a rcu pointer is tagged with both MEM_RCU and PTR_TRUSTED
so that it can be passed into kfuncs or helpers as an argument.

Martin raised a good question in [1] such that the rcu pointer,
although being able to accessing the object, might have reference
count of 0. This might cause a problem if the rcu pointer is passed
to a kfunc which expects trusted arguments where ref count should
be greater than 0.

This patch makes the following changes related to MEM_RCU pointer:
  - MEM_RCU pointer might be NULL (PTR_MAYBE_NULL).
  - Introduce KF_RCU so MEM_RCU ptr can be acquired with
    a KF_RCU tagged kfunc which assumes ref count of rcu ptr
    could be zero.
  - For mem access 'b = ptr->a', say 'ptr' is a MEM_RCU ptr, and
    'a' is tagged with __rcu as well. Let us mark 'b' as
    MEM_RCU | PTR_MAYBE_NULL.

 [1] https://lore.kernel.org/bpf/ac70f574-4023-664e-b711-e0d3b18117fd@linux.dev/

Fixes: 9bb00b2895cb ("bpf: Add kfunc bpf_rcu_read_lock/unlock()")
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221203184602.477272-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 +-
 include/linux/btf.h          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b5090e89cb3f..c07b351a5bc7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -682,7 +682,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	}
 }
 
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
 
 static inline bool bpf_type_has_unsafe_modifiers(u32 type)
 {
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9ed00077db6e..cbd6e4096f8c 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,6 +70,7 @@
 #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
 #define KF_SLEEPABLE    (1 << 5) /* kfunc may sleep */
 #define KF_DESTRUCTIVE  (1 << 6) /* kfunc performs destructive actions */
+#define KF_RCU          (1 << 7) /* kfunc only takes rcu pointer arguments */
 
 /*
  * Return the name of the passed struct, if exists, or halt the build if for
-- 
cgit 


From c0c852dd1876dc1db4600ce951a92aadd3073b1c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 3 Dec 2022 12:49:54 -0800
Subject: bpf: Do not mark certain LSM hook arguments as trusted

Martin mentioned that the verifier cannot assume arguments from
LSM hook sk_alloc_security being trusted since after the hook
is called, the sk ref_count is set to 1. This will overwrite
the ref_count changed by the bpf program and may cause ref_count
underflow later on.

I then further checked some other hooks. For example,
for bpf_lsm_file_alloc() hook in fs/file_table.c,

        f->f_cred = get_cred(cred);
        error = security_file_alloc(f);
        if (unlikely(error)) {
                file_free_rcu(&f->f_rcuhead);
                return ERR_PTR(error);
        }

        atomic_long_set(&f->f_count, 1);

The input parameter 'f' to security_file_alloc() cannot be trusted
as well.

Specifically, I investiaged bpf_map/bpf_prog/file/sk/task alloc/free
lsm hooks. Except bpf_map_alloc and task_alloc, arguments for all other
hooks should not be considered as trusted. This may not be a complete
list, but it covers common usage for sk and task.

Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs")
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221203204954.2043348-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_lsm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 4bcf76a9bb06..1de7ece5d36d 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog);
 
 bool bpf_lsm_is_sleepable_hook(u32 btf_id);
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog);
 
 static inline struct bpf_storage_blob *bpf_inode(
 	const struct inode *inode)
@@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
 	return false;
 }
 
+static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+	return false;
+}
+
 static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 				      const struct bpf_prog *prog)
 {
-- 
cgit 


From 919e43fad5163a8ceb39826ecdee897a9f799351 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Fri, 2 Dec 2022 20:41:29 +0200
Subject: xfrm: add an interface to offload policy

Extend netlink interface to add and delete XFRM policy from the device.
This functionality is a first step to implement packet IPsec offload solution.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5aa35c58c342..4096e3fe8e4a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1040,6 +1040,9 @@ struct xfrmdev_ops {
 	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
 				       struct xfrm_state *x);
 	void	(*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+	int	(*xdo_dev_policy_add) (struct xfrm_policy *x);
+	void	(*xdo_dev_policy_delete) (struct xfrm_policy *x);
+	void	(*xdo_dev_policy_free) (struct xfrm_policy *x);
 };
 #endif
 
-- 
cgit 


From f3da86dc2c8c9004445cfbb15ac086773622d853 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Fri, 2 Dec 2022 20:41:33 +0200
Subject: xfrm: add support to HW update soft and hard limits

Both in RX and TX, the traffic that performs IPsec packet offload
transformation is accounted by HW. It is needed to properly handle
hard limits that require to drop the packet.

It means that XFRM core needs to update internal counters with the one
that accounted by the HW, so new callbacks are introduced in this patch.

In case of soft or hard limit is occurred, the driver should call to
xfrm_state_check_expire() that will perform key rekeying exactly as
done by XFRM core.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4096e3fe8e4a..29ae964e3b89 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1040,6 +1040,7 @@ struct xfrmdev_ops {
 	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
 				       struct xfrm_state *x);
 	void	(*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+	void	(*xdo_dev_state_update_curlft) (struct xfrm_state *x);
 	int	(*xdo_dev_policy_add) (struct xfrm_policy *x);
 	void	(*xdo_dev_policy_delete) (struct xfrm_policy *x);
 	void	(*xdo_dev_policy_free) (struct xfrm_policy *x);
-- 
cgit 


From a46e9010124256f5bf5fc2c241a45cf1944b768e Mon Sep 17 00:00:00 2001
From: Revanth Kumar Uppala <ruppala@nvidia.com>
Date: Thu, 1 Dec 2022 15:58:43 +0000
Subject: net: stmmac: Power up SERDES after the PHY link

The Tegra MGBE ethernet controller requires that the SERDES link is
powered-up after the PHY link is up, otherwise the link fails to
become ready following a resume from suspend. Add a variable to indicate
that the SERDES link must be powered-up after the PHY link.

Signed-off-by: Revanth Kumar Uppala <ruppala@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index fb2e88614f5d..83ca2e8eb6b5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -271,5 +271,6 @@ struct plat_stmmacenet_data {
 	int msi_tx_base_vec;
 	bool use_phy_wol;
 	bool sph_disable;
+	bool serdes_up_after_phy_linkup;
 };
 #endif
-- 
cgit 


From 3afee4ed336ed7a6cff78d23339879ffb654e02e Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Fri, 2 Dec 2022 22:10:23 +0200
Subject: net/mlx5: Add HW definitions for IPsec packet offload

Add all needed bits to support IPsec packet offload mode.

Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/mlx5/mlx5_ifc.h | 53 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5a4e914e2a6f..300b56ea5ff4 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -445,7 +445,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         max_modify_header_actions[0x8];
 	u8         max_ft_level[0x8];
 
-	u8         reserved_at_40[0x6];
+	u8         reformat_add_esp_trasport[0x1];
+	u8         reserved_at_41[0x2];
+	u8         reformat_del_esp_trasport[0x1];
+	u8         reserved_at_44[0x2];
 	u8         execute_aso[0x1];
 	u8         reserved_at_47[0x19];
 
@@ -638,8 +641,10 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
 	u8         reserved_at_1a0[0x8];
 
 	u8         macsec_syndrome[0x8];
+	u8         ipsec_syndrome[0x8];
+	u8         reserved_at_1b8[0x8];
 
-	u8         reserved_at_1b0[0x50];
+	u8         reserved_at_1c0[0x40];
 };
 
 struct mlx5_ifc_fte_match_set_misc3_bits {
@@ -6384,6 +6389,9 @@ enum mlx5_reformat_ctx_type {
 	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
 	MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
 	MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+	MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
+	MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
+	MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
 	MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
 	MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
 	MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
@@ -11563,6 +11571,41 @@ enum {
 	MLX5_IPSEC_OBJECT_ICV_LEN_16B,
 };
 
+enum {
+	MLX5_IPSEC_ASO_REG_C_0_1 = 0x0,
+	MLX5_IPSEC_ASO_REG_C_2_3 = 0x1,
+	MLX5_IPSEC_ASO_REG_C_4_5 = 0x2,
+	MLX5_IPSEC_ASO_REG_C_6_7 = 0x3,
+};
+
+enum {
+	MLX5_IPSEC_ASO_MODE              = 0x0,
+	MLX5_IPSEC_ASO_REPLAY_PROTECTION = 0x1,
+	MLX5_IPSEC_ASO_INC_SN            = 0x2,
+};
+
+struct mlx5_ifc_ipsec_aso_bits {
+	u8         valid[0x1];
+	u8         reserved_at_201[0x1];
+	u8         mode[0x2];
+	u8         window_sz[0x2];
+	u8         soft_lft_arm[0x1];
+	u8         hard_lft_arm[0x1];
+	u8         remove_flow_enable[0x1];
+	u8         esn_event_arm[0x1];
+	u8         reserved_at_20a[0x16];
+
+	u8         remove_flow_pkt_cnt[0x20];
+
+	u8         remove_flow_soft_lft[0x20];
+
+	u8         reserved_at_260[0x80];
+
+	u8         mode_parameter[0x20];
+
+	u8         replay_protection_window[0x100];
+};
+
 struct mlx5_ifc_ipsec_obj_bits {
 	u8         modify_field_select[0x40];
 	u8         full_offload[0x1];
@@ -11584,7 +11627,11 @@ struct mlx5_ifc_ipsec_obj_bits {
 
 	u8         implicit_iv[0x40];
 
-	u8         reserved_at_100[0x700];
+	u8         reserved_at_100[0x8];
+	u8         ipsec_aso_access_pd[0x18];
+	u8         reserved_at_120[0xe0];
+
+	struct mlx5_ifc_ipsec_aso_bits ipsec_aso;
 };
 
 struct mlx5_ifc_create_ipsec_obj_in_bits {
-- 
cgit 


From bffdeaa8a5af7200b0e74c9d5a41167f86626a36 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 6 Dec 2022 15:33:43 -0800
Subject: bpf: decouple prune and jump points

BPF verifier marks some instructions as prune points. Currently these
prune points serve two purposes.

It's a point where verifier tries to find previously verified state and
check current state's equivalence to short circuit verification for
current code path.

But also currently it's a point where jump history, used for precision
backtracking, is updated. This is done so that non-linear flow of
execution could be properly backtracked.

Such coupling is coincidental and unnecessary. Some prune points are not
part of some non-linear jump path, so don't need update of jump history.
On the other hand, not all instructions which have to be recorded in
jump history necessarily are good prune points.

This patch splits prune and jump points into independent flags.
Currently all prune points are marked as jump points to minimize amount
of changes in this patch, but next patch will perform some optimization
of prune vs jmp point placement.

No functional changes are intended.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221206233345.438540-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c07b351a5bc7..70d06a99f0b8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -452,6 +452,7 @@ struct bpf_insn_aux_data {
 	/* below fields are initialized once */
 	unsigned int orig_idx; /* original instruction index */
 	bool prune_point;
+	bool jmp_point;
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-- 
cgit 


From ed883bec679b027b198d57a336715f8298fb88b4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 5 Dec 2022 12:34:42 +0100
Subject: net: ethernet: mtk_wed: add reset to rx_ring_setup callback

This patch adds reset parameter to mtk_wed_rx_ring_setup signature
in order to align rx_ring_setup callback to tx_ring_setup one introduced
in 'commit 23dca7a90017 ("net: ethernet: mtk_wed: add reset to
tx_ring_setup callback")'

Co-developed-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/29c6e7a5469e784406cf3e2920351d1207713d05.1670239984.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/mediatek/mtk_wed.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index beb190449704..a0746d4aec20 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -160,7 +160,7 @@ struct mtk_wed_ops {
 	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
 			     void __iomem *regs, bool reset);
 	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
-			     void __iomem *regs);
+			     void __iomem *regs, bool reset);
 	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
 				 void __iomem *regs);
 	int (*msg_update)(struct mtk_wed_device *dev, int cmd_id,
@@ -228,8 +228,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
 	(_dev)->ops->irq_get(_dev, _mask)
 #define mtk_wed_device_irq_set_mask(_dev, _mask) \
 	(_dev)->ops->irq_set_mask(_dev, _mask)
-#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \
-	(_dev)->ops->rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) \
+	(_dev)->ops->rx_ring_setup(_dev, _ring, _regs, _reset)
 #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \
 	(_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
 #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
@@ -249,7 +249,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
 #define mtk_wed_device_irq_get(_dev, _mask) 0
 #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
-#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV
 #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash)  do {} while (0)
 #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
 #define mtk_wed_device_stop(_dev) do {} while (0)
-- 
cgit 


From 5b481acab4ce017fda8166fa9428511da41109e5 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 6 Dec 2022 15:59:32 +0100
Subject: bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret

The current way of expressing that a non-bpf kernel component is willing
to accept that bpf programs can be attached to it and that they can change
the return value is to abuse ALLOW_ERROR_INJECTION.
This is debated in the link below, and the result is that it is not a
reasonable thing to do.

Reuse the kfunc declaration structure to also tag the kernel functions
we want to be fmodret. This way we can control from any subsystem which
functions are being modified by bpf without touching the verifier.

Link: https://lore.kernel.org/all/20221121104403.1545f9b5@gandalf.local.home/
Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20221206145936.922196-2-benjamin.tissoires@redhat.com
---
 include/linux/btf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9aababc5d78..6a0808e7845c 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -412,8 +412,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
 u32 *btf_kfunc_id_set_contains(const struct btf *btf,
 			       enum bpf_prog_type prog_type,
 			       u32 kfunc_btf_id);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 			      const struct btf_kfunc_id_set *s);
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
 s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
 int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
 				struct module *owner);
-- 
cgit 


From df268f6ca7da1088d7ecff4250d1478d3fa2406b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 20:51:12 +0200
Subject: net/mlx5: Introduce IFC bits for migratable

Introduce IFC related capabilities to enable setting VF to be able to
perform live migration. e.g.: to be migratable.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Acked-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5a4e914e2a6f..2093131483c7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -68,6 +68,7 @@ enum {
 	MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 	MLX5_SET_HCA_CAP_OP_MOD_ROCE                  = 0x4,
+	MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2       = 0x20,
 	MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION       = 0x25,
 };
 
@@ -1875,7 +1876,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 };
 
 struct mlx5_ifc_cmd_hca_cap_2_bits {
-	u8	   reserved_at_0[0xa0];
+	u8	   reserved_at_0[0x80];
+
+	u8         migratable[0x1];
+	u8         reserved_at_81[0x1f];
 
 	u8	   max_reformat_insert_size[0x8];
 	u8	   max_reformat_insert_offset[0x8];
-- 
cgit 


From 47d0c500d76c7b1d220c47cd875763fef04eba2e Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Dec 2022 20:51:16 +0200
Subject: net/mlx5: Add generic getters for other functions caps

Downstream patch requires to get other function GENERAL2 caps while
mlx5_vport_get_other_func_cap() gets only one type of caps (general).
Rename it to represent this and introduce a generic implementation
of mlx5_vport_get_other_func_cap().

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Acked-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/vport.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aad53cb72f17..7f31432f44c2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
 int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
 
 u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+				  u16 opmod);
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit 


From f1543c7abab25d93bc8e9fae79b4cb3153ed6669 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 31 May 2022 13:55:28 +0300
Subject: net/mlx5: mlx5_ifc updates for MATCH_DEFINER general object

Update full structure of match definer and add an ID of
the SELECT match definer type.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 68 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2093131483c7..294cfe175c4b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -6108,6 +6108,38 @@ struct mlx5_ifc_match_definer_format_32_bits {
 	u8         inner_dmac_15_0[0x10];
 };
 
+enum {
+	MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61,
+};
+
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48
+#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9
+#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8
+
+struct mlx5_ifc_match_definer_match_mask_bits {
+	u8         reserved_at_1c0[5][0x20];
+	u8         match_dw_8[0x20];
+	u8         match_dw_7[0x20];
+	u8         match_dw_6[0x20];
+	u8         match_dw_5[0x20];
+	u8         match_dw_4[0x20];
+	u8         match_dw_3[0x20];
+	u8         match_dw_2[0x20];
+	u8         match_dw_1[0x20];
+	u8         match_dw_0[0x20];
+
+	u8         match_byte_7[0x8];
+	u8         match_byte_6[0x8];
+	u8         match_byte_5[0x8];
+	u8         match_byte_4[0x8];
+
+	u8         match_byte_3[0x8];
+	u8         match_byte_2[0x8];
+	u8         match_byte_1[0x8];
+	u8         match_byte_0[0x8];
+};
+
 struct mlx5_ifc_match_definer_bits {
 	u8         modify_field_select[0x40];
 
@@ -6116,9 +6148,41 @@ struct mlx5_ifc_match_definer_bits {
 	u8         reserved_at_80[0x10];
 	u8         format_id[0x10];
 
-	u8         reserved_at_a0[0x160];
+	u8         reserved_at_a0[0x60];
 
-	u8         match_mask[16][0x20];
+	u8         format_select_dw3[0x8];
+	u8         format_select_dw2[0x8];
+	u8         format_select_dw1[0x8];
+	u8         format_select_dw0[0x8];
+
+	u8         format_select_dw7[0x8];
+	u8         format_select_dw6[0x8];
+	u8         format_select_dw5[0x8];
+	u8         format_select_dw4[0x8];
+
+	u8         reserved_at_100[0x18];
+	u8         format_select_dw8[0x8];
+
+	u8         reserved_at_120[0x20];
+
+	u8         format_select_byte3[0x8];
+	u8         format_select_byte2[0x8];
+	u8         format_select_byte1[0x8];
+	u8         format_select_byte0[0x8];
+
+	u8         format_select_byte7[0x8];
+	u8         format_select_byte6[0x8];
+	u8         format_select_byte5[0x8];
+	u8         format_select_byte4[0x8];
+
+	u8         reserved_at_180[0x40];
+
+	union {
+		struct {
+			u8         match_mask[16][0x20];
+		};
+		struct mlx5_ifc_match_definer_match_mask_bits match_mask_format;
+	};
 };
 
 struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-- 
cgit 


From 38bf24c38d19dc4ba5ec684ac2afa2f8e256db1e Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 15 Aug 2022 12:45:28 +0300
Subject: net/mlx5: fs, add match on ranges API

Range is a new flow destination type which allows matching on
a range of values instead of matching on a specific value.

Range flow destination has the following fields:
 - hit_ft: flow table to forward the traffic in case of hit
 - miss_ft: flow table to forward the traffic in case of miss
 - field: which packet characteristic to match on
 - min: minimal value for the selected field
 - max: maximal value for the selected field

Note:
 - In order to match, the value in the packet should meet
   the following criteria: min <= value < max
 - Currently, the only supported field type is L2 packet length

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/fs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c7a91981cd5a..ba6958b49a8e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -50,6 +50,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_PORT,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER,
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM,
+	MLX5_FLOW_DESTINATION_TYPE_RANGE,
 };
 
 enum {
@@ -143,6 +144,10 @@ enum {
 	MLX5_FLOW_DEST_VPORT_REFORMAT_ID  = BIT(1),
 };
 
+enum mlx5_flow_dest_range_field {
+	MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN = 0,
+};
+
 struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
@@ -156,6 +161,13 @@ struct mlx5_flow_destination {
 			struct mlx5_pkt_reformat *pkt_reformat;
 			u8		flags;
 		} vport;
+		struct {
+			struct mlx5_flow_table         *hit_ft;
+			struct mlx5_flow_table         *miss_ft;
+			enum mlx5_flow_dest_range_field field;
+			u32                             min;
+			u32                             max;
+		} range;
 		u32			sampler_id;
 	};
 };
-- 
cgit 


From 6b75bd3d036745b9be30917909f03602099adbdb Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:35 +0530
Subject: bpf: Refactor ARG_PTR_TO_DYNPTR checks into process_dynptr_func

ARG_PTR_TO_DYNPTR is akin to ARG_PTR_TO_TIMER, ARG_PTR_TO_KPTR, where
the underlying register type is subjected to more special checks to
determine the type of object represented by the pointer and its state
consistency.

Move dynptr checks to their own 'process_dynptr_func' function so that
is consistent and in-line with existing code. This also makes it easier
to reuse this code for kfunc handling.

Then, reuse this consolidated function in kfunc dynptr handling too.
Note that for kfuncs, the arg_type constraint of DYNPTR_TYPE_LOCAL has
been lifted.

Acked-by: David Vernet <void@manifault.com>
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 70d06a99f0b8..df0cb825e0e3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -615,11 +615,9 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   enum bpf_arg_type arg_type);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 		   u32 regno, u32 mem_size);
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
-			      struct bpf_reg_state *reg);
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
-			     struct bpf_reg_state *reg,
-			     enum bpf_arg_type arg_type);
+struct bpf_call_arg_meta;
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+			enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
 
 /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
 static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
-- 
cgit 


From 270605317366e4535d8d9fc3d9da1ad0fb3c9d45 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:37 +0530
Subject: bpf: Rework process_dynptr_func

Recently, user ringbuf support introduced a PTR_TO_DYNPTR register type
for use in callback state, because in case of user ringbuf helpers,
there is no dynptr on the stack that is passed into the callback. To
reflect such a state, a special register type was created.

However, some checks have been bypassed incorrectly during the addition
of this feature. First, for arg_type with MEM_UNINIT flag which
initialize a dynptr, they must be rejected for such register type.
Secondly, in the future, there are plans to add dynptr helpers that
operate on the dynptr itself and may change its offset and other
properties.

In all of these cases, PTR_TO_DYNPTR shouldn't be allowed to be passed
to such helpers, however the current code simply returns 0.

The rejection for helpers that release the dynptr is already handled.

For fixing this, we take a step back and rework existing code in a way
that will allow fitting in all classes of helpers and have a coherent
model for dealing with the variety of use cases in which dynptr is used.

First, for ARG_PTR_TO_DYNPTR, it can either be set alone or together
with a DYNPTR_TYPE_* constant that denotes the only type it accepts.

Next, helpers which initialize a dynptr use MEM_UNINIT to indicate this
fact. To make the distinction clear, use MEM_RDONLY flag to indicate
that the helper only operates on the memory pointed to by the dynptr,
not the dynptr itself. In C parlance, it would be equivalent to taking
the dynptr as a point to const argument.

When either of these flags are not present, the helper is allowed to
mutate both the dynptr itself and also the memory it points to.
Currently, the read only status of the memory is not tracked in the
dynptr, but it would be trivial to add this support inside dynptr state
of the register.

With these changes and renaming PTR_TO_DYNPTR to CONST_PTR_TO_DYNPTR to
better reflect its usage, it can no longer be passed to helpers that
initialize a dynptr, i.e. bpf_dynptr_from_mem, bpf_ringbuf_reserve_dynptr.

A note to reviewers is that in code that does mark_stack_slots_dynptr,
and unmark_stack_slots_dynptr, we implicitly rely on the fact that
PTR_TO_STACK reg is the only case that can reach that code path, as one
cannot pass CONST_PTR_TO_DYNPTR to helpers that don't set MEM_RDONLY. In
both cases such helpers won't be setting that flag.

The next patch will add a couple of selftest cases to make sure this
doesn't break.

Fixes: 205715673844 ("bpf: Add bpf_user_ringbuf_drain() helper")
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4920ac252754..3de24cfb7a3d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -775,7 +775,7 @@ enum bpf_reg_type {
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_BUF,		 /* reg points to a read/write buffer */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
-	PTR_TO_DYNPTR,		 /* reg points to a dynptr */
+	CONST_PTR_TO_DYNPTR,	 /* reg points to a const struct bpf_dynptr */
 	__BPF_REG_TYPE_MAX,
 
 	/* Extended reg_types. */
@@ -2828,7 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 		     enum bpf_dynptr_type type, u32 offset, u32 size);
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_LSM
 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
-- 
cgit 


From e47877c7aa821c413b45e05f804819579bdfa1a3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 6 Dec 2022 11:36:32 -1000
Subject: rhashtable: Allow rhashtable to be used from irq-safe contexts

rhashtable currently only does bh-safe synchronization making it impossible
to use from irq-safe contexts. Switch it to use irq-safe synchronization to
remove the restriction.

v2: Update the lock functions to return the ulong flags value and unlock
    functions to take the value directly instead of passing around the
    pointer. Suggested by Linus.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 61 +++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 68dab3e08aad..5b5357c0bd8c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -323,29 +323,36 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
  * When we write to a bucket without unlocking, we use rht_assign_locked().
  */
 
-static inline void rht_lock(struct bucket_table *tbl,
-			    struct rhash_lock_head __rcu **bkt)
+static inline unsigned long rht_lock(struct bucket_table *tbl,
+				     struct rhash_lock_head __rcu **bkt)
 {
-	local_bh_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	bit_spin_lock(0, (unsigned long *)bkt);
 	lock_map_acquire(&tbl->dep_map);
+	return flags;
 }
 
-static inline void rht_lock_nested(struct bucket_table *tbl,
-				   struct rhash_lock_head __rcu **bucket,
-				   unsigned int subclass)
+static inline unsigned long rht_lock_nested(struct bucket_table *tbl,
+					struct rhash_lock_head __rcu **bucket,
+					unsigned int subclass)
 {
-	local_bh_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	bit_spin_lock(0, (unsigned long *)bucket);
 	lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+	return flags;
 }
 
 static inline void rht_unlock(struct bucket_table *tbl,
-			      struct rhash_lock_head __rcu **bkt)
+			      struct rhash_lock_head __rcu **bkt,
+			      unsigned long flags)
 {
 	lock_map_release(&tbl->dep_map);
 	bit_spin_unlock(0, (unsigned long *)bkt);
-	local_bh_enable();
+	local_irq_restore(flags);
 }
 
 static inline struct rhash_head *__rht_ptr(
@@ -393,7 +400,8 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
 
 static inline void rht_assign_unlock(struct bucket_table *tbl,
 				     struct rhash_lock_head __rcu **bkt,
-				     struct rhash_head *obj)
+				     struct rhash_head *obj,
+				     unsigned long flags)
 {
 	if (rht_is_a_nulls(obj))
 		obj = NULL;
@@ -401,7 +409,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
 	rcu_assign_pointer(*bkt, (void *)obj);
 	preempt_enable();
 	__release(bitlock);
-	local_bh_enable();
+	local_irq_restore(flags);
 }
 
 /**
@@ -706,6 +714,7 @@ static inline void *__rhashtable_insert_fast(
 	struct rhash_head __rcu **pprev;
 	struct bucket_table *tbl;
 	struct rhash_head *head;
+	unsigned long flags;
 	unsigned int hash;
 	int elasticity;
 	void *data;
@@ -720,11 +729,11 @@ static inline void *__rhashtable_insert_fast(
 	if (!bkt)
 		goto out;
 	pprev = NULL;
-	rht_lock(tbl, bkt);
+	flags = rht_lock(tbl, bkt);
 
 	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
 slow_path:
-		rht_unlock(tbl, bkt);
+		rht_unlock(tbl, bkt, flags);
 		rcu_read_unlock();
 		return rhashtable_insert_slow(ht, key, obj);
 	}
@@ -756,9 +765,9 @@ slow_path:
 		RCU_INIT_POINTER(list->rhead.next, head);
 		if (pprev) {
 			rcu_assign_pointer(*pprev, obj);
-			rht_unlock(tbl, bkt);
+			rht_unlock(tbl, bkt, flags);
 		} else
-			rht_assign_unlock(tbl, bkt, obj);
+			rht_assign_unlock(tbl, bkt, obj, flags);
 		data = NULL;
 		goto out;
 	}
@@ -785,7 +794,7 @@ slow_path:
 	}
 
 	atomic_inc(&ht->nelems);
-	rht_assign_unlock(tbl, bkt, obj);
+	rht_assign_unlock(tbl, bkt, obj, flags);
 
 	if (rht_grow_above_75(ht, tbl))
 		schedule_work(&ht->run_work);
@@ -797,7 +806,7 @@ out:
 	return data;
 
 out_unlock:
-	rht_unlock(tbl, bkt);
+	rht_unlock(tbl, bkt, flags);
 	goto out;
 }
 
@@ -991,6 +1000,7 @@ static inline int __rhashtable_remove_fast_one(
 	struct rhash_lock_head __rcu **bkt;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
+	unsigned long flags;
 	unsigned int hash;
 	int err = -ENOENT;
 
@@ -999,7 +1009,7 @@ static inline int __rhashtable_remove_fast_one(
 	if (!bkt)
 		return -ENOENT;
 	pprev = NULL;
-	rht_lock(tbl, bkt);
+	flags = rht_lock(tbl, bkt);
 
 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
 		struct rhlist_head *list;
@@ -1043,14 +1053,14 @@ static inline int __rhashtable_remove_fast_one(
 
 		if (pprev) {
 			rcu_assign_pointer(*pprev, obj);
-			rht_unlock(tbl, bkt);
+			rht_unlock(tbl, bkt, flags);
 		} else {
-			rht_assign_unlock(tbl, bkt, obj);
+			rht_assign_unlock(tbl, bkt, obj, flags);
 		}
 		goto unlocked;
 	}
 
-	rht_unlock(tbl, bkt);
+	rht_unlock(tbl, bkt, flags);
 unlocked:
 	if (err > 0) {
 		atomic_dec(&ht->nelems);
@@ -1143,6 +1153,7 @@ static inline int __rhashtable_replace_fast(
 	struct rhash_lock_head __rcu **bkt;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
+	unsigned long flags;
 	unsigned int hash;
 	int err = -ENOENT;
 
@@ -1158,7 +1169,7 @@ static inline int __rhashtable_replace_fast(
 		return -ENOENT;
 
 	pprev = NULL;
-	rht_lock(tbl, bkt);
+	flags = rht_lock(tbl, bkt);
 
 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
 		if (he != obj_old) {
@@ -1169,15 +1180,15 @@ static inline int __rhashtable_replace_fast(
 		rcu_assign_pointer(obj_new->next, obj_old->next);
 		if (pprev) {
 			rcu_assign_pointer(*pprev, obj_new);
-			rht_unlock(tbl, bkt);
+			rht_unlock(tbl, bkt, flags);
 		} else {
-			rht_assign_unlock(tbl, bkt, obj_new);
+			rht_assign_unlock(tbl, bkt, obj_new, flags);
 		}
 		err = 0;
 		goto unlocked;
 	}
 
-	rht_unlock(tbl, bkt);
+	rht_unlock(tbl, bkt, flags);
 
 unlocked:
 	return err;
-- 
cgit 


From ce098da1497c6dee9589fce2c61d1910f4fcf0e7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 7 Dec 2022 22:02:59 -0800
Subject: skbuff: Introduce slab_build_skb()

syzkaller reported:

  BUG: KASAN: slab-out-of-bounds in __build_skb_around+0x235/0x340 net/core/skbuff.c:294
  Write of size 32 at addr ffff88802aa172c0 by task syz-executor413/5295

For bpf_prog_test_run_skb(), which uses a kmalloc()ed buffer passed to
build_skb().

When build_skb() is passed a frag_size of 0, it means the buffer came
from kmalloc. In these cases, ksize() is used to find its actual size,
but since the allocation may not have been made to that size, actually
perform the krealloc() call so that all the associated buffer size
checking will be correctly notified (and use the "new" pointer so that
compiler hinting works correctly). Split this logic out into a new
interface, slab_build_skb(), but leave the original 0 checking for now
to catch any stragglers.

Reported-by: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com
Link: https://groups.google.com/g/syzkaller-bugs/c/UnIKxTtU5-0/m/-wbXinkgAQAJ
Fixes: 38931d8989b5 ("mm: Make ksize() a reporting-only function")
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: pepsipu <soopthegoop@gmail.com>
Cc: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: kasan-dev <kasan-dev@googlegroups.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: ast@kernel.org
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: jolsa@kernel.org
Cc: KP Singh <kpsingh@kernel.org>
Cc: martin.lau@linux.dev
Cc: Stanislav Fomichev <sdf@google.com>
Cc: song@kernel.org
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221208060256.give.994-kees@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4e464a27adaf..4c8492401a10 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1255,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
 void skb_attempt_defer_free(struct sk_buff *skb);
 
 struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
+struct sk_buff *slab_build_skb(void *data);
 
 /**
  * alloc_skb - allocate a network buffer
-- 
cgit 


From 5dd9cdbc9dec3e99b19e483767e247d15ca8cc0d Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 9 Dec 2022 15:57:29 +0200
Subject: bpf: states_equal() must build idmap for all function frames

verifier.c:states_equal() must maintain register ID mapping across all
function frames. Otherwise the following example might be erroneously
marked as safe:

main:
    fp[-24] = map_lookup_elem(...)  ; frame[0].fp[-24].id == 1
    fp[-32] = map_lookup_elem(...)  ; frame[0].fp[-32].id == 2
    r1 = &fp[-24]
    r2 = &fp[-32]
    call foo()
    r0 = 0
    exit

foo:
  0: r9 = r1
  1: r8 = r2
  2: r7 = ktime_get_ns()
  3: r6 = ktime_get_ns()
  4: if (r6 > r7) goto skip_assign
  5: r9 = r8

skip_assign:                ; <--- checkpoint
  6: r9 = *r9               ; (a) frame[1].r9.id == 2
                            ; (b) frame[1].r9.id == 1

  7: if r9 == 0 goto exit:  ; mark_ptr_or_null_regs() transfers != 0 info
                            ; for all regs sharing ID:
                            ;   (a) r9 != 0 => &frame[0].fp[-32] != 0
                            ;   (b) r9 != 0 => &frame[0].fp[-24] != 0

  8: r8 = *r8               ; (a) r8 == &frame[0].fp[-32]
                            ; (b) r8 == &frame[0].fp[-32]
  9: r0 = *r8               ; (a) safe
                            ; (b) unsafe

exit:
 10: exit

While processing call to foo() verifier considers the following
execution paths:

(a) 0-10
(b) 0-4,6-10
(There is also path 0-7,10 but it is not interesting for the issue at
 hand. (a) is verified first.)

Suppose that checkpoint is created at (6) when path (a) is verified,
next path (b) is verified and (6) is reached.

If states_equal() maintains separate 'idmap' for each frame the
mapping at (6) for frame[1] would be empty and
regsafe(r9)::check_ids() would add a pair 2->1 and return true,
which is an error.

If states_equal() maintains single 'idmap' for all frames the mapping
at (6) would be { 1->1, 2->2 } and regsafe(r9)::check_ids() would
return false when trying to add a pair 2->1.

This issue was suggested in the following discussion:
https://lore.kernel.org/bpf/CAEf4BzbFB5g4oUfyxk9rHy-PJSLQ3h8q9mV=rVoXfr_JVm8+1Q@mail.gmail.com/

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221209135733.28851-4-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index df0cb825e0e3..53d175cbaa02 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -273,9 +273,9 @@ struct bpf_id_pair {
 	u32 cur;
 };
 
-/* Maximum number of register states that can exist at once */
-#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
 #define MAX_CALL_FRAMES 8
+/* Maximum number of register states that can exist at once */
+#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
-- 
cgit 


From 860b7f27b8f78564ca5a2f607e0820b2d352a562 Mon Sep 17 00:00:00 2001
From: Andrew Melnychenko <andrew@daynix.com>
Date: Wed, 7 Dec 2022 13:35:57 +0200
Subject: linux/virtio_net.h: Support USO offload in vnet header.

Now, it's possible to convert USO vnet packets from/to skb.
Added support for GSO_UDP_L4 offload.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index a960de68ac69..bdf8de2cdd93 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
 	case VIRTIO_NET_HDR_GSO_TCPV6:
 		return protocol == cpu_to_be16(ETH_P_IPV6);
 	case VIRTIO_NET_HDR_GSO_UDP:
+	case VIRTIO_NET_HDR_GSO_UDP_L4:
 		return protocol == cpu_to_be16(ETH_P_IP) ||
 		       protocol == cpu_to_be16(ETH_P_IPV6);
 	default:
@@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
 	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 	case VIRTIO_NET_HDR_GSO_TCPV4:
 	case VIRTIO_NET_HDR_GSO_UDP:
+	case VIRTIO_NET_HDR_GSO_UDP_L4:
 		skb->protocol = cpu_to_be16(ETH_P_IP);
 		break;
 	case VIRTIO_NET_HDR_GSO_TCPV6:
@@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 			ip_proto = IPPROTO_UDP;
 			thlen = sizeof(struct udphdr);
 			break;
+		case VIRTIO_NET_HDR_GSO_UDP_L4:
+			gso_type = SKB_GSO_UDP_L4;
+			ip_proto = IPPROTO_UDP;
+			thlen = sizeof(struct udphdr);
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 		else if (sinfo->gso_type & SKB_GSO_TCPV6)
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		else if (sinfo->gso_type & SKB_GSO_UDP_L4)
+			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
 		else
 			return -EINVAL;
 		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
-- 
cgit 


From 983055bf839745ec2812fc55cacd96888aa0d7a6 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Thu, 1 Dec 2022 02:46:54 +0900
Subject: USB: core: export usb_cache_string()

usb_cache_string() can also be useful for the drivers so export it.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/all/20221130174658.29282-4-mailhol.vincent@wanadoo.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 9ff1ad4dfad1..d2d2f41052c0 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1829,6 +1829,7 @@ static inline int usb_get_ptm_status(struct usb_device *dev, void *data)
 
 extern int usb_string(struct usb_device *dev, int index,
 	char *buf, size_t size);
+extern char *usb_cache_string(struct usb_device *udev, int index);
 
 /* wrappers that also update important state inside usbcore */
 extern int usb_clear_halt(struct usb_device *dev, int pipe);
-- 
cgit 


From 8a321cf7becc6c065ae595b837b826a2a81036b9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 9 Dec 2022 10:21:38 -0500
Subject: net: add IFF_NO_ADDRCONF and use it in bonding to prevent ipv6
 addrconf

Currently, in bonding it reused the IFF_SLAVE flag and checked it
in ipv6 addrconf to prevent ipv6 addrconf.

However, it is not a proper flag to use for no ipv6 addrconf, for
bonding it has to move IFF_SLAVE flag setting ahead of dev_open()
in bond_enslave(). Also, IFF_MASTER/SLAVE are historical flags
used in bonding and eql, as Jiri mentioned, the new devices like
Team, Failover do not use this flag.

So as Jiri suggested, this patch adds IFF_NO_ADDRCONF in priv_flags
of the device to indicate no ipv6 addconf, and uses it in bonding
and moves IFF_SLAVE flag setting back to its original place.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2287cb8eb9e4..aad12a179e54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1662,6 +1662,7 @@ struct net_device_ops {
  * @IFF_FAILOVER: device is a failover master device
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ * @IFF_NO_ADDRCONF: prevent ipv6 addrconf
  * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
  *	skb_headlen(skb) == 0 (data starts from frag0)
  * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
@@ -1697,7 +1698,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER			= 1<<27,
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
-	/* was IFF_LIVE_RENAME_OK */
+	IFF_NO_ADDRCONF			= BIT_ULL(30),
 	IFF_TX_SKB_NO_LINEAR		= BIT_ULL(31),
 	IFF_CHANGE_PROTO_DOWN		= BIT_ULL(32),
 };
-- 
cgit