From 5afced3bf28100d81fb2fe7e98918632a08feaf5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 29 May 2020 15:05:22 +0200
Subject: writeback: Avoid skipping inode writeback

Inode's i_io_list list head is used to attach inode to several different
lists - wb->{b_dirty, b_dirty_time, b_io, b_more_io}. When flush worker
prepares a list of inodes to writeback e.g. for sync(2), it moves inodes
to b_io list. Thus it is critical for sync(2) data integrity guarantees
that inode is not requeued to any other writeback list when inode is
queued for processing by flush worker. That's the reason why
writeback_single_inode() does not touch i_io_list (unless the inode is
completely clean) and why __mark_inode_dirty() does not touch i_io_list
if I_SYNC flag is set.

However there are two flaws in the current logic:

1) When inode has only I_DIRTY_TIME set but it is already queued in b_io
list due to sync(2), concurrent __mark_inode_dirty(inode, I_DIRTY_SYNC)
can still move inode back to b_dirty list resulting in skipping
writeback of inode time stamps during sync(2).

2) When inode is on b_dirty_time list and writeback_single_inode() races
with __mark_inode_dirty() like:

writeback_single_inode()		__mark_inode_dirty(inode, I_DIRTY_PAGES)
  inode->i_state |= I_SYNC
  __writeback_single_inode()
					  inode->i_state |= I_DIRTY_PAGES;
					  if (inode->i_state & I_SYNC)
					    bail
  if (!(inode->i_state & I_DIRTY_ALL))
  - not true so nothing done

We end up with I_DIRTY_PAGES inode on b_dirty_time list and thus
standard background writeback will not writeback this inode leading to
possible dirty throttling stalls etc. (thanks to Martijn Coenen for this
analysis).

Fix these problems by tracking whether inode is queued in b_io or
b_more_io lists in a new I_SYNC_QUEUED flag. When this flag is set, we
know flush worker has queued inode and we should not touch i_io_list.
On the other hand we also know that once flush worker is done with the
inode it will requeue the inode to appropriate dirty list. When
I_SYNC_QUEUED is not set, __mark_inode_dirty() can (and must) move inode
to appropriate dirty list.

Reported-by: Martijn Coenen <maco@android.com>
Reviewed-by: Martijn Coenen <maco@android.com>
Tested-by: Martijn Coenen <maco@android.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19ef6c88c152..48556efcdcf0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2157,6 +2157,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
  *
  * I_DONTCACHE		Evict inode as soon as it is not used anymore.
  *
+ * I_SYNC_QUEUED	Inode is queued in b_io or b_more_io writeback lists.
+ *			Used to detect that mark_inode_dirty() should not move
+ * 			inode between dirty lists.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2174,12 +2178,12 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
 #define I_LINKABLE		(1 << 10)
 #define I_DIRTY_TIME		(1 << 11)
-#define __I_DIRTY_TIME_EXPIRED	12
-#define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
+#define I_DIRTY_TIME_EXPIRED	(1 << 12)
 #define I_WB_SWITCH		(1 << 13)
 #define I_OVL_INUSE		(1 << 14)
 #define I_CREATING		(1 << 15)
 #define I_DONTCACHE		(1 << 16)
+#define I_SYNC_QUEUED		(1 << 17)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
-- 
cgit 


From f9cae926f35e8230330f28c7b743ad088611a8de Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 29 May 2020 16:08:58 +0200
Subject: writeback: Fix sync livelock due to b_dirty_time processing

When we are processing writeback for sync(2), move_expired_inodes()
didn't set any inode expiry value (older_than_this). This can result in
writeback never completing if there's steady stream of inodes added to
b_dirty_time list as writeback rechecks dirty lists after each writeback
round whether there's more work to be done. Fix the problem by using
sync(2) start time is inode expiry value when processing b_dirty_time
list similarly as for ordinarily dirtied inodes. This requires some
refactoring of older_than_this handling which simplifies the code
noticeably as a bonus.

Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
CC: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/trace/events/writeback.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 10f5d1fa7347..7565dcd59697 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -498,8 +498,9 @@ DEFINE_WBC_EVENT(wbc_writepage);
 TRACE_EVENT(writeback_queue_io,
 	TP_PROTO(struct bdi_writeback *wb,
 		 struct wb_writeback_work *work,
+		 unsigned long dirtied_before,
 		 int moved),
-	TP_ARGS(wb, work, moved),
+	TP_ARGS(wb, work, dirtied_before, moved),
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
 		__field(unsigned long,	older)
@@ -509,19 +510,17 @@ TRACE_EVENT(writeback_queue_io,
 		__field(ino_t,		cgroup_ino)
 	),
 	TP_fast_assign(
-		unsigned long *older_than_this = work->older_than_this;
 		strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
-		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+		__entry->older	= dirtied_before;
+		__entry->age	= (jiffies - dirtied_before) * 1000 / HZ;
 		__entry->moved	= moved;
 		__entry->reason	= work->reason;
 		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
 	),
 	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu",
 		__entry->name,
-		__entry->older,	/* older_than_this in jiffies */
-		__entry->age,	/* older_than_this in relative milliseconds */
+		__entry->older,	/* dirtied_before in jiffies */
+		__entry->age,	/* dirtied_before in relative milliseconds */
 		__entry->moved,
 		__print_symbolic(__entry->reason, WB_WORK_REASON),
 		(unsigned long)__entry->cgroup_ino
-- 
cgit 


From 5fcd57505c002efc5823a7355e21f48dd02d5a51 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 29 May 2020 16:24:43 +0200
Subject: writeback: Drop I_DIRTY_TIME_EXPIRE

The only use of I_DIRTY_TIME_EXPIRE is to detect in
__writeback_single_inode() that inode got there because flush worker
decided it's time to writeback the dirty inode time stamps (either
because we are syncing or because of age). However we can detect this
directly in __writeback_single_inode() and there's no need for the
strange propagation with I_DIRTY_TIME_EXPIRE flag.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h               | 1 -
 include/trace/events/writeback.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 48556efcdcf0..45eadf5bea5d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2178,7 +2178,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
 #define I_LINKABLE		(1 << 10)
 #define I_DIRTY_TIME		(1 << 11)
-#define I_DIRTY_TIME_EXPIRED	(1 << 12)
 #define I_WB_SWITCH		(1 << 13)
 #define I_OVL_INUSE		(1 << 14)
 #define I_CREATING		(1 << 15)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 7565dcd59697..e7cbccc7c14c 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -20,7 +20,6 @@
 		{I_CLEAR,		"I_CLEAR"},		\
 		{I_SYNC,		"I_SYNC"},		\
 		{I_DIRTY_TIME,		"I_DIRTY_TIME"},	\
-		{I_DIRTY_TIME_EXPIRED,	"I_DIRTY_TIME_EXPIRED"}, \
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
-- 
cgit 


From 529a781ee07aaa58be8164d75ba5998eb7dd216c Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang@huawei.com>
Date: Sat, 20 Jun 2020 10:54:27 +0800
Subject: jbd2: remove unused parameter in jbd2_journal_try_to_free_buffers()

Parameter gfp_mask in jbd2_journal_try_to_free_buffers() is no longer
used after commit <536fc240e7147> ("jbd2: clean up
jbd2_journal_try_to_free_buffers()"), so just remove it.

Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Link: https://lore.kernel.org/r/20200620025427.1756360-6-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d56128df2aff..a756a4cdf939 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1380,7 +1380,7 @@ extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned int, unsigned int);
-extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
+extern int	 jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
 extern int	 jbd2_journal_stop(handle_t *);
 extern int	 jbd2_journal_flush (journal_t *);
 extern void	 jbd2_journal_lock_updates (journal_t *);
-- 
cgit 


From ab74c7b23f3770935016e3eb3ecdf1e42b73efaa Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 15 Jul 2020 11:48:55 -0400
Subject: ext4: indicate via a block bitmap read is prefetched via a tracepoint

Modify the ext4_read_block_bitmap_load tracepoint so that it tells us
whether a block bitmap is being prefetched.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 include/trace/events/ext4.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index cc41d692ae8e..cbcd2e1a608d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1312,18 +1312,34 @@ DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
 	TP_ARGS(sb, group)
 );
 
-DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
+DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
 
 	TP_PROTO(struct super_block *sb, unsigned long group),
 
 	TP_ARGS(sb, group)
 );
 
-DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
+TRACE_EVENT(ext4_read_block_bitmap_load,
+	TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch),
 
-	TP_PROTO(struct super_block *sb, unsigned long group),
+	TP_ARGS(sb, group, prefetch),
 
-	TP_ARGS(sb, group)
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+		__field(	bool,	prefetch		)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+		__entry->prefetch = prefetch;
+	),
+
+	TP_printk("dev %d,%d group %u prefetch %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->group, __entry->prefetch)
 );
 
 TRACE_EVENT(ext4_direct_IO_enter,
-- 
cgit 


From 3d392b2676bf3199863a1e5efb2c087ad9d442a4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 17 Jul 2020 00:14:40 -0400
Subject: ext4: add prefetch_block_bitmaps mount option

For file systems where we can afford to keep the buddy bitmaps cached,
we can speed up initial writes to large file systems by starting to
load the block allocation bitmaps as soon as the file system is
mounted.  This won't work well for _super_ large file systems, or
memory constrained systems, so we only enable this when it is
requested via a mount option.

Addresses-Google-Bug: 159488342
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 include/trace/events/ext4.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index cbcd2e1a608d..8008d2e116b9 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2742,6 +2742,50 @@ TRACE_EVENT(ext4_error,
 		  __entry->function, __entry->line)
 );
 
+TRACE_EVENT(ext4_prefetch_bitmaps,
+	    TP_PROTO(struct super_block *sb, ext4_group_t group,
+		     ext4_group_t next, unsigned int prefetch_ios),
+
+	TP_ARGS(sb, group, next, prefetch_ios),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+		__field(	__u32,	next			)
+		__field(	__u32,	ios			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+		__entry->next	= next;
+		__entry->ios	= prefetch_ios;
+	),
+
+	TP_printk("dev %d,%d group %u next %u ios %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->group, __entry->next, __entry->ios)
+);
+
+TRACE_EVENT(ext4_lazy_itable_init,
+	    TP_PROTO(struct super_block *sb, ext4_group_t group),
+
+	TP_ARGS(sb, group),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u32,	group			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->group	= group;
+	),
+
+	TP_printk("dev %d,%d group %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit 


From 2404b73c3f1a5f15726c6ecd226b56f6f992767f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 10 Aug 2020 13:52:15 +0200
Subject: netfilter: avoid ipv6 -> nf_defrag_ipv6 module dependency

nf_ct_frag6_gather is part of nf_defrag_ipv6.ko, not ipv6 core.

The current use of the netfilter ipv6 stub indirections  causes a module
dependency between ipv6 and nf_defrag_ipv6.

This prevents nf_defrag_ipv6 module from being removed because ipv6 can't
be unloaded.

Remove the indirection and always use a direct call.  This creates a
depency from nf_conntrack_bridge to nf_defrag_ipv6 instead:

modinfo nf_conntrack
depends:        nf_conntrack,nf_defrag_ipv6,bridge

.. and nf_conntrack already depends on nf_defrag_ipv6 anyway.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index aac42c28fe62..9b67394471e1 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -58,7 +58,6 @@ struct nf_ipv6_ops {
 			int (*output)(struct net *, struct sock *, struct sk_buff *));
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
 #if IS_MODULE(CONFIG_IPV6)
-	int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
 	int (*br_fragment)(struct net *net, struct sock *sk,
 			   struct sk_buff *skb,
 			   struct nf_bridge_frag_data *data,
@@ -117,23 +116,6 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
 
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
-static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
-				    u32 user)
-{
-#if IS_MODULE(CONFIG_IPV6)
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return 1;
-
-	return v6_ops->br_defrag(net, skb, user);
-#elif IS_BUILTIN(CONFIG_IPV6)
-	return nf_ct_frag6_gather(net, skb, user);
-#else
-	return 1;
-#endif
-}
-
 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		    struct nf_bridge_frag_data *data,
 		    int (*output)(struct net *, struct sock *sk,
-- 
cgit 


From 9420139f516d7fbc248ce17f35275cb005ed98ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Aug 2020 12:26:24 +0200
Subject: dma-pool: fix coherent pool allocations for IOMMU mappings

When allocating coherent pool memory for an IOMMU mapping we don't care
about the DMA mask.  Move the guess for the initial GFP mask into the
dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer
argument so that it doesn't get applied to the IOMMU case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Amit Pundir <amit.pundir@linaro.org>
---
 include/linux/dma-direct.h  | 3 ---
 include/linux/dma-mapping.h | 5 +++--
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 5a3ce2a24794..6e87225600ae 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -73,9 +73,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
 }
 
 u64 dma_direct_get_required_mask(struct device *dev);
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
-				  u64 *phys_mask);
-bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 016b96b384bd..52635e91143b 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -522,8 +522,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
 			pgprot_t prot, const void *caller);
 void dma_common_free_remap(void *cpu_addr, size_t size);
 
-void *dma_alloc_from_pool(struct device *dev, size_t size,
-			  struct page **ret_page, gfp_t flags);
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+		void **cpu_addr, gfp_t flags,
+		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t));
 bool dma_free_from_pool(struct device *dev, void *start, size_t size);
 
 int
-- 
cgit 


From 53bf2b0e4e4c1ff0a957474237f9dcd20036ca54 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Thu, 6 Aug 2020 13:18:16 +0530
Subject: firmware: ti_sci: Add support for getting resource with subtype

With SYSFW ABI 3.0 changes, interrupts coming out of an interrupt
controller is identified by a type and it is consistent across SoCs.
Similarly global events for Interrupt aggregator. So add an API to get
resource range using a resource type.

Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20200806074826.24607-4-lokeshvutla@ti.com
---
 include/linux/soc/ti/ti_sci_protocol.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 49c5d29cd33c..cf27b080e148 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -220,6 +220,9 @@ struct ti_sci_rm_core_ops {
 				    u16 *range_start, u16 *range_num);
 };
 
+#define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT		0
+#define TI_SCI_RESASG_SUBTYPE_IA_VINT		0xa
+#define TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT	0xd
 /**
  * struct ti_sci_rm_irq_ops: IRQ management operations
  * @set_irq:		Set an IRQ route between the requested source
@@ -556,6 +559,9 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res);
 struct ti_sci_resource *
 devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle,
 			    struct device *dev, u32 dev_id, char *of_prop);
+struct ti_sci_resource *
+devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev,
+			 u32 dev_id, u32 sub_type);
 
 #else	/* CONFIG_TI_SCI_PROTOCOL */
 
@@ -609,6 +615,13 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle,
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static inline struct ti_sci_resource *
+devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev,
+			 u32 dev_id, u32 sub_type);
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif	/* CONFIG_TI_SCI_PROTOCOL */
 
 #endif	/* __TISCI_PROTOCOL_H */
-- 
cgit 


From 29e44f4535faa71a70827af3639b5e6762d8f02a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 17 Aug 2020 11:07:28 +0100
Subject: watch_queue: Limit the number of watches a user can hold

Impose a limit on the number of watches that a user can hold so that
they can't use this mechanism to fill up all the available memory.

This is done by putting a counter in user_struct that's incremented when
a watch is allocated and decreased when it is released.  If the number
exceeds the RLIMIT_NOFILE limit, the watch is rejected with EAGAIN.

This can be tested by the following means:

 (1) Create a watch queue and attach it to fd 5 in the program given - in
     this case, bash:

	keyctl watch_session /tmp/nlog /tmp/gclog 5 bash

 (2) In the shell, set the maximum number of files to, say, 99:

	ulimit -n 99

 (3) Add 200 keyrings:

	for ((i=0; i<200; i++)); do keyctl newring a$i @s || break; done

 (4) Try to watch all of the keyrings:

	for ((i=0; i<200; i++)); do echo $i; keyctl watch_add 5 %:a$i || break; done

     This should fail when the number of watches belonging to the user hits
     99.

 (5) Remove all the keyrings and all of those watches should go away:

	for ((i=0; i<200; i++)); do keyctl unlink %:a$i; done

 (6) Kill off the watch queue by exiting the shell spawned by
     watch_session.

Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/user.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 917d88edb7b9..a8ec3b6093fc 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -36,6 +36,9 @@ struct user_struct {
     defined(CONFIG_NET) || defined(CONFIG_IO_URING)
 	atomic_long_t locked_vm;
 #endif
+#ifdef CONFIG_WATCH_QUEUE
+	atomic_t nr_watches;	/* The number of watches this user currently has */
+#endif
 
 	/* Miscellaneous per-user rate limit */
 	struct ratelimit_state ratelimit;
-- 
cgit 


From 77ef38574beb3e0b414db48e9c0f04633df68ba6 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@intel.com>
Date: Fri, 14 Aug 2020 11:38:42 +0200
Subject: drm/modeset-lock: Take the modeset BKL for legacy drivers

This fell off in the conversion in

commit 9bcaa3fe58ab7559e71df798bcff6e0795158695
Author: Michal Orzel <michalorzel.eng@gmail.com>
Date:   Tue Apr 28 19:10:04 2020 +0200

    drm: Replace drm_modeset_lock/unlock_all with DRM_MODESET_LOCK_ALL_* helpers

but it's caught by the drm_warn_on_modeset_not_all_locked() that the
legacy modeset code uses. Since this is the bkl and it's unclear
what's all protected, play it safe and grab it again for legacy
drivers.

Unfortunately this means we need to sprinkle a few more #includes
around.

Also we need to add the drm_device as a parameter to the _END macro.

Finally remove the mute_lock() from setcrtc, since that's now done by
the macro.

Cc: Alex Deucher <alexdeucher@gmail.com>
References: https://gitlab.freedesktop.org/drm/amd/-/issues/1224
Fixes: 9bcaa3fe58ab ("drm: Replace drm_modeset_lock/unlock_all with DRM_MODESET_LOCK_ALL_* helpers")
Cc: Michal Orzel <michalorzel.eng@gmail.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v5.8+
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200814093842.3048472-1-daniel.vetter@ffwll.ch
---
 include/drm/drm_modeset_lock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
index 4fc9a43ac45a..aafd07388eb7 100644
--- a/include/drm/drm_modeset_lock.h
+++ b/include/drm/drm_modeset_lock.h
@@ -164,6 +164,8 @@ int drm_modeset_lock_all_ctx(struct drm_device *dev,
  * is 0, so no error checking is necessary
  */
 #define DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, flags, ret)		\
+	if (!drm_drv_uses_atomic_modeset(dev))				\
+		mutex_lock(&dev->mode_config.mutex);			\
 	drm_modeset_acquire_init(&ctx, flags);				\
 modeset_lock_retry:							\
 	ret = drm_modeset_lock_all_ctx(dev, &ctx);			\
@@ -172,6 +174,7 @@ modeset_lock_retry:							\
 
 /**
  * DRM_MODESET_LOCK_ALL_END - Helper to release and cleanup modeset locks
+ * @dev: drm device
  * @ctx: local modeset acquire context, will be dereferenced
  * @ret: local ret/err/etc variable to track error status
  *
@@ -188,7 +191,7 @@ modeset_lock_retry:							\
  * to that failure. In both of these cases the code between BEGIN/END will not
  * be run, so the failure will reflect the inability to grab the locks.
  */
-#define DRM_MODESET_LOCK_ALL_END(ctx, ret)				\
+#define DRM_MODESET_LOCK_ALL_END(dev, ctx, ret)				\
 modeset_lock_fail:							\
 	if (ret == -EDEADLK) {						\
 		ret = drm_modeset_backoff(&ctx);			\
@@ -196,6 +199,8 @@ modeset_lock_fail:							\
 			goto modeset_lock_retry;			\
 	}								\
 	drm_modeset_drop_locks(&ctx);					\
-	drm_modeset_acquire_fini(&ctx);
+	drm_modeset_acquire_fini(&ctx);					\
+	if (!drm_drv_uses_atomic_modeset(dev))				\
+		mutex_unlock(&dev->mode_config.mutex);
 
 #endif /* DRM_MODESET_LOCK_H_ */
-- 
cgit 


From 0b76e642f9ad2471e899e2dd71b9543b7e85e9f6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 16 Aug 2020 15:25:49 -0700
Subject: phylink: <linux/phylink.h>: fix function prototype kernel-doc warning

Fix a kernel-doc warning for the pcs_config() function prototype:

../include/linux/phylink.h:406: warning: Excess function parameter 'permit_pause_to_mac' description in 'pcs_config'

Fixes: 7137e18f6f88 ("net: phylink: add struct phylink_pcs")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index a8e876317e25..c36fb41a7d90 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -402,7 +402,8 @@ void pcs_get_state(struct phylink_pcs *pcs,
  * For most 10GBASE-R, there is no advertisement.
  */
 int pcs_config(struct phylink_pcs *pcs, unsigned int mode,
-	       phy_interface_t interface, const unsigned long *advertising);
+	       phy_interface_t interface, const unsigned long *advertising,
+	       bool permit_pause_to_mac);
 
 /**
  * pcs_an_restart() - restart 802.3z BaseX autonegotiation
-- 
cgit 


From bd05220c7be3356046861c317d9c287ca50445ba Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Tue, 11 Aug 2020 19:24:57 +0100
Subject: arch/ia64: Restore arch-specific pgd_offset_k implementation

IA-64 is special and treats pgd_offset_k() differently to pgd_offset(),
using different formulae to calculate the indices into the kernel and user
PGDs.  The index into the user PGDs takes into account the region number,
but the index into the kernel (init_mm) PGD always assumes a predefined
kernel region number. Commit 974b9b2c68f3 ("mm: consolidate pte_index() and
pte_offset_*() definitions") made IA-64 use a generic pgd_offset_k() which
incorrectly used pgd_index() for kernel page tables.  As a result, the
index into the kernel PGD was going out of bounds and the kernel hung
during early boot.

Allow overrides of pgd_offset_k() and override it on IA-64 with the old
implementation that will correctly index the kernel PGD.

Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions")
Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com>
Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a124c21e3204..e8cbc2e795d5 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -117,7 +117,9 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
  */
+#ifndef pgd_offset_k
 #define pgd_offset_k(address)		pgd_offset(&init_mm, (address))
+#endif
 
 /*
  * In many cases it is known that a virtual address is mapped at PMD or PTE
-- 
cgit 


From 4ef1a7cb08e94da1f2f2a34ee6cefe7ae142dc98 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 Aug 2020 14:30:49 +0800
Subject: ipv6: some fixes for ipv6_dev_find()

This patch is to do 3 things for ipv6_dev_find():

  As David A. noticed,

  - rt6_lookup() is not really needed. Different from __ip_dev_find(),
    ipv6_dev_find() doesn't have a compatibility problem, so remove it.

  As Hideaki suggested,

  - "valid" (non-tentative) check for the address is also needed.
    ipv6_chk_addr() calls ipv6_chk_addr_and_flags(), which will
    traverse the address hash list, but it's heavy to be called
    inside ipv6_dev_find(). This patch is to reuse the code of
    ipv6_chk_addr_and_flags() for ipv6_dev_find().

  - dev parameter is passed into ipv6_dev_find(), as link-local
    addresses from user space has sin6_scope_id set and the dev
    lookup needs it.

Fixes: 81f6cb31222d ("ipv6: add ipv6_dev_find()")
Suggested-by: YOSHIFUJI Hideaki <hideaki.yoshifuji@miraclelinux.com>
Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index ba3f6c15ad2b..18f783dcd55f 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -97,7 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
 
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
 
-struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr);
+struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
+				 struct net_device *dev);
 
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
 				     const struct in6_addr *addr,
-- 
cgit 


From 27bc446e2def38db3244a6eb4bb1d6312936610a Mon Sep 17 00:00:00 2001
From: brookxu <brookxu.cn@gmail.com>
Date: Mon, 17 Aug 2020 15:36:15 +0800
Subject: ext4: limit the length of per-inode prealloc list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the scenario of writing sparse files, the per-inode prealloc list may
be very long, resulting in high overhead for ext4_mb_use_preallocated().
To circumvent this problem, we limit the maximum length of per-inode
prealloc list to 512 and allow users to modify it.

After patching, we observed that the sys ratio of cpu has dropped, and
the system throughput has increased significantly. We created a process
to write the sparse file, and the running time of the process on the
fixed kernel was significantly reduced, as follows:

Running time on unfixed kernel：
[root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
real    0m2.051s
user    0m0.008s
sys     0m2.026s

Running time on fixed kernel：
[root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
real    0m0.471s
user    0m0.004s
sys     0m0.395s

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Link: https://lore.kernel.org/r/d7a98178-056b-6db5-6bce-4ead23f4a257@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 8008d2e116b9..4c8b99ec8606 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -746,24 +746,29 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 );
 
 TRACE_EVENT(ext4_discard_preallocations,
-	TP_PROTO(struct inode *inode),
+	TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed),
 
-	TP_ARGS(inode),
+	TP_ARGS(inode, len, needed),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
+		__field(	dev_t,		dev		)
+		__field(	ino_t,		ino		)
+		__field(	unsigned int,	len		)
+		__field(	unsigned int,	needed		)
 
 	),
 
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
+		__entry->len	= len;
+		__entry->needed	= needed;
 	),
 
-	TP_printk("dev %d,%d ino %lu",
+	TP_printk("dev %d,%d ino %lu len: %u needed %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino)
+		  (unsigned long) __entry->ino, __entry->len,
+		  __entry->needed)
 );
 
 TRACE_EVENT(ext4_mb_discard_preallocations,
-- 
cgit 


From 2ac6795fcc085e8d03649f1bbd0d70aaff612cad Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@wdc.com>
Date: Mon, 17 Aug 2020 18:12:49 +0530
Subject: clocksource/drivers: Add CLINT timer driver

We add a separate CLINT timer driver for Linux RISC-V M-mode (i.e.
RISC-V NoMMU kernel).

The CLINT MMIO device provides three things:
1. 64bit free running counter register
2. 64bit per-CPU time compare registers
3. 32bit per-CPU inter-processor interrupt registers

Unlike other timer devices, CLINT provides IPI registers along with
timer registers. To use CLINT IPI registers, the CLINT timer driver
provides IPI related callbacks to arch/riscv.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Tested-by: Emil Renner Berhing <kernel@esmil.dk>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index a2710e654b64..3215023d4852 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -132,6 +132,7 @@ enum cpuhp_state {
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
 	CPUHP_AP_RISCV_TIMER_STARTING,
+	CPUHP_AP_CLINT_TIMER_STARTING,
 	CPUHP_AP_CSKY_TIMER_STARTING,
 	CPUHP_AP_HYPERV_TIMER_STARTING,
 	CPUHP_AP_KVM_STARTING,
-- 
cgit 


From b16fc097bc283184cde40e5b30d15705e1590410 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 21 Aug 2020 15:36:42 +0200
Subject: bpf: Fix two typos in uapi/linux/bpf.h

Also remove trailing whitespaces in bpf_skb_get_tunnel_key example code.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200821133642.18870-1-tklauser@distanz.ch
---
 include/uapi/linux/bpf.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0480f893facd..b6238b2209b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -767,7 +767,7 @@ union bpf_attr {
  *
  * 		Also, note that **bpf_trace_printk**\ () is slow, and should
  * 		only be used for debugging purposes. For this reason, a notice
- * 		bloc (spanning several lines) is printed to kernel logs and
+ * 		block (spanning several lines) is printed to kernel logs and
  * 		states that the helper should not be used "for production use"
  * 		the first time this helper is used (or more precisely, when
  * 		**trace_printk**\ () buffers are allocated). For passing values
@@ -1033,14 +1033,14 @@ union bpf_attr {
  *
  * 			int ret;
  * 			struct bpf_tunnel_key key = {};
- * 			
+ *
  * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
  * 			if (ret < 0)
  * 				return TC_ACT_SHOT;	// drop packet
- * 			
+ *
  * 			if (key.remote_ipv4 != 0x0a000001)
  * 				return TC_ACT_SHOT;	// drop packet
- * 			
+ *
  * 			return TC_ACT_OK;		// accept packet
  *
  * 		This interface can also be used with all encapsulation devices
@@ -1147,7 +1147,7 @@ union bpf_attr {
  * 	Description
  * 		Retrieve the realm or the route, that is to say the
  * 		**tclassid** field of the destination for the *skb*. The
- * 		indentifier retrieved is a user-provided tag, similar to the
+ * 		identifier retrieved is a user-provided tag, similar to the
  * 		one used with the net_cls cgroup (see description for
  * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
  * 		held by a route (a destination entry), not by a task.
-- 
cgit 


From df561f6688fef775baa341a0f5d960becd248b11 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Sun, 23 Aug 2020 17:36:59 -0500
Subject: treewide: Use fallthrough pseudo-keyword

Replace the existing /* fall through */ comments and its variants with
the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary
fall-through markings when it is the case.

[1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/compat.h       |  6 +++---
 include/linux/filter.h       |  2 +-
 include/linux/jhash.h        | 26 +++++++++++++-------------
 include/linux/mm.h           |  9 ++++++---
 include/linux/signal.h       | 12 ++++++------
 include/linux/skbuff.h       | 12 ++++++------
 include/math-emu/op-common.h | 10 +++++-----
 7 files changed, 40 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index d38c4d7e83bd..b354ce58966e 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -429,11 +429,11 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
 	compat_sigset_t v;
 	switch (_NSIG_WORDS) {
 	case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
-		/* fall through */
+		fallthrough;
 	case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
-		/* fall through */
+		fallthrough;
 	case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
-		/* fall through */
+		fallthrough;
 	case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
 	}
 	return copy_to_user(compat, &v, size) ? -EFAULT : 0;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0a355b005bf4..ebfb7cfb65f1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1200,7 +1200,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 		BPF_ANCILLARY(RANDOM);
 		BPF_ANCILLARY(VLAN_TPID);
 		}
-		/* Fallthrough. */
+		fallthrough;
 	default:
 		return ftest->code;
 	}
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 19ddd43aee68..cfb62e9f37be 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -86,17 +86,17 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 	}
 	/* Last block: affect all 32 bits of (c) */
 	switch (length) {
-	case 12: c += (u32)k[11]<<24;	/* fall through */
-	case 11: c += (u32)k[10]<<16;	/* fall through */
-	case 10: c += (u32)k[9]<<8;	/* fall through */
-	case 9:  c += k[8];		/* fall through */
-	case 8:  b += (u32)k[7]<<24;	/* fall through */
-	case 7:  b += (u32)k[6]<<16;	/* fall through */
-	case 6:  b += (u32)k[5]<<8;	/* fall through */
-	case 5:  b += k[4];		/* fall through */
-	case 4:  a += (u32)k[3]<<24;	/* fall through */
-	case 3:  a += (u32)k[2]<<16;	/* fall through */
-	case 2:  a += (u32)k[1]<<8;	/* fall through */
+	case 12: c += (u32)k[11]<<24;	fallthrough;
+	case 11: c += (u32)k[10]<<16;	fallthrough;
+	case 10: c += (u32)k[9]<<8;	fallthrough;
+	case 9:  c += k[8];		fallthrough;
+	case 8:  b += (u32)k[7]<<24;	fallthrough;
+	case 7:  b += (u32)k[6]<<16;	fallthrough;
+	case 6:  b += (u32)k[5]<<8;	fallthrough;
+	case 5:  b += k[4];		fallthrough;
+	case 4:  a += (u32)k[3]<<24;	fallthrough;
+	case 3:  a += (u32)k[2]<<16;	fallthrough;
+	case 2:  a += (u32)k[1]<<8;	fallthrough;
 	case 1:  a += k[0];
 		 __jhash_final(a, b, c);
 	case 0: /* Nothing left to add */
@@ -132,8 +132,8 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 
 	/* Handle the last 3 u32's */
 	switch (length) {
-	case 3: c += k[2];	/* fall through */
-	case 2: b += k[1];	/* fall through */
+	case 3: c += k[2];	fallthrough;
+	case 2: b += k[1];	fallthrough;
 	case 1: a += k[0];
 		__jhash_final(a, b, c);
 	case 0:	/* Nothing left to add */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1983e08f5906..97c83773b6f0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,11 +157,14 @@ static inline void __mm_zero_struct_page(struct page *page)
 
 	switch (sizeof(struct page)) {
 	case 80:
-		_pp[9] = 0;	/* fallthrough */
+		_pp[9] = 0;
+		fallthrough;
 	case 72:
-		_pp[8] = 0;	/* fallthrough */
+		_pp[8] = 0;
+		fallthrough;
 	case 64:
-		_pp[7] = 0;	/* fallthrough */
+		_pp[7] = 0;
+		fallthrough;
 	case 56:
 		_pp[6] = 0;
 		_pp[5] = 0;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 6bb1a3f0258c..7bbc0e9cf084 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -137,11 +137,11 @@ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
 		b3 = b->sig[3]; b2 = b->sig[2];				\
 		r->sig[3] = op(a3, b3);					\
 		r->sig[2] = op(a2, b2);					\
-		/* fall through */					\
+		fallthrough;						\
 	case 2:								\
 		a1 = a->sig[1]; b1 = b->sig[1];				\
 		r->sig[1] = op(a1, b1);					\
-		/* fall through */					\
+		fallthrough;						\
 	case 1:								\
 		a0 = a->sig[0]; b0 = b->sig[0];				\
 		r->sig[0] = op(a0, b0);					\
@@ -171,9 +171,9 @@ static inline void name(sigset_t *set)					\
 	switch (_NSIG_WORDS) {						\
 	case 4:	set->sig[3] = op(set->sig[3]);				\
 		set->sig[2] = op(set->sig[2]);				\
-		/* fall through */					\
+		fallthrough;						\
 	case 2:	set->sig[1] = op(set->sig[1]);				\
-		/* fall through */					\
+		fallthrough;						\
 	case 1:	set->sig[0] = op(set->sig[0]);				\
 		    break;						\
 	default:							\
@@ -194,7 +194,7 @@ static inline void sigemptyset(sigset_t *set)
 		memset(set, 0, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = 0;
-		/* fall through */
+		fallthrough;
 	case 1:	set->sig[0] = 0;
 		break;
 	}
@@ -207,7 +207,7 @@ static inline void sigfillset(sigset_t *set)
 		memset(set, -1, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = -1;
-		/* fall through */
+		fallthrough;
 	case 1:	set->sig[0] = -1;
 		break;
 	}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 46881d902124..ab57cf787c1f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3745,19 +3745,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
 #define __it(x, op) (x -= sizeof(u##op))
 #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
 	case 32: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case 24: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case 16: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case  8: diffs |= __it_diff(a, b, 64);
 		break;
 	case 28: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case 20: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case 12: diffs |= __it_diff(a, b, 64);
-		 /* fall through */
+		fallthrough;
 	case  4: diffs |= __it_diff(a, b, 32);
 		break;
 	}
diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h
index adcc6a97db61..143568d64b20 100644
--- a/include/math-emu/op-common.h
+++ b/include/math-emu/op-common.h
@@ -308,7 +308,7 @@ do {									     \
 									     \
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):			     \
     R##_e = X##_e;							     \
-	  /* Fall through */						     \
+	fallthrough;							     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):			     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):				     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):				     \
@@ -319,7 +319,7 @@ do {									     \
 									     \
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):			     \
     R##_e = Y##_e;							     \
-	  /* Fall through */						     \
+	fallthrough;							     \
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):			     \
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):				     \
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):				     \
@@ -417,7 +417,7 @@ do {							\
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
     R##_s = X##_s;					\
-	/* Fall through */				\
+	  fallthrough;					\
 							\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
@@ -431,7 +431,7 @@ do {							\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
     R##_s = Y##_s;					\
-	/* Fall through */				\
+	  fallthrough;					\
 							\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
@@ -497,7 +497,7 @@ do {							\
 							\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
     FP_SET_EXCEPTION(FP_EX_DIVZERO);			\
-	  /* Fall through */				\
+	fallthrough;					\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
     R##_c = FP_CLS_INF;					\
-- 
cgit 


From 12564485ed8caac3c18572793ec01330792c7191 Mon Sep 17 00:00:00 2001
From: Shawn Anastasio <shawn@anastas.io>
Date: Fri, 21 Aug 2020 13:55:56 -0500
Subject: Revert "powerpc/64s: Remove PROT_SAO support"

This reverts commit 5c9fa16e8abd342ce04dc830c1ebb2a03abf6c05.

Since PROT_SAO can still be useful for certain classes of software,
reintroduce it. Concerns about guest migration for LPARs using SAO
will be addressed next.

Signed-off-by: Shawn Anastasio <shawn@anastas.io>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200821185558.35561-2-shawn@anastas.io
---
 include/linux/mm.h             | 2 ++
 include/trace/events/mmflags.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1983e08f5906..5abe6df4247e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -321,6 +321,8 @@ extern unsigned int kobjsize(const void *objp);
 
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
+#elif defined(CONFIG_PPC)
+# define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 939092dbcb8b..5fb752034386 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -114,6 +114,8 @@ IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
 
 #if defined(CONFIG_X86)
 #define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
+#elif defined(CONFIG_PPC)
+#define __VM_ARCH_SPECIFIC_1 {VM_SAO,     "sao"           }
 #elif defined(CONFIG_PARISC) || defined(CONFIG_IA64)
 #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP,	"growsup"	}
 #elif !defined(CONFIG_MMU)
-- 
cgit 


From f062f025fc3a4fae3e6a50d13fb1fafb11900fa7 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 19 Aug 2020 10:50:08 +0200
Subject: libceph: add __maybe_unused to DEFINE_CEPH_FEATURE

Avoid -Wunused-const-variable warnings for "make W=1".

Reported-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/ceph/ceph_features.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index fcd84e8d88f4..999636d53cf2 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -11,14 +11,14 @@
 #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
 
 #define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
-	static const uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
-	static const uint64_t CEPH_FEATUREMASK_##name =			\
+	static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit);		\
+	static const uint64_t __maybe_unused CEPH_FEATUREMASK_##name =			\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /* this bit is ignored but still advertised by release *when* */
 #define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
-	static const uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
-	static const uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+	static const uint64_t __maybe_unused DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit);	\
+	static const uint64_t __maybe_unused DEPRECATED_CEPH_FEATUREMASK_##name =		\
 		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /*
-- 
cgit 


From c94a88f341c9b8f05d8639f62bb5d95936f881cd Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 20 Aug 2020 19:20:46 +0200
Subject: sched: Use __always_inline on is_idle_task()

is_idle_task() may be used from noinstr functions such as
irqentry_enter(). Since the compiler is free to not inline regular
inline functions, switch to using __always_inline.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200820172046.GA177701@elver.google.com
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd930efd3..afe01e232935 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1666,7 +1666,7 @@ extern struct task_struct *idle_task(int cpu);
  *
  * Return: 1 if @p is an idle task. 0 otherwise.
  */
-static inline bool is_idle_task(const struct task_struct *p)
+static __always_inline bool is_idle_task(const struct task_struct *p)
 {
 	return !!(p->flags & PF_IDLE);
 }
-- 
cgit 


From fddf9055a60dfcc97bda5ef03c8fa4108ed555c5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Aug 2020 09:13:30 +0200
Subject: lockdep: Use raw_cpu_*() for per-cpu variables

Sven reported that commit a21ee6055c30 ("lockdep: Change
hardirq{s_enabled,_context} to per-cpu variables") caused trouble on
s390 because their this_cpu_*() primitives disable preemption which
then lands back tracing.

On the one hand, per-cpu ops should use preempt_*able_notrace() and
raw_local_irq_*(), on the other hand, we can trivialy use raw_cpu_*()
ops for this.

Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables")
Reported-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200821085348.192346882@infradead.org
---
 include/linux/irqflags.h |  6 +++---
 include/linux/lockdep.h  | 18 +++++++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index bd5c55755447..d7e50a215ea9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -53,13 +53,13 @@ DECLARE_PER_CPU(int, hardirq_context);
   extern void trace_hardirqs_off_finish(void);
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context()	(this_cpu_read(hardirq_context))
+# define lockdep_hardirq_context()	(raw_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)	((p)->softirq_context)
 # define lockdep_hardirqs_enabled()	(this_cpu_read(hardirqs_enabled))
 # define lockdep_softirqs_enabled(p)	((p)->softirqs_enabled)
 # define lockdep_hardirq_enter()			\
 do {							\
-	if (this_cpu_inc_return(hardirq_context) == 1)	\
+	if (__this_cpu_inc_return(hardirq_context) == 1)\
 		current->hardirq_threaded = 0;		\
 } while (0)
 # define lockdep_hardirq_threaded()		\
@@ -68,7 +68,7 @@ do {						\
 } while (0)
 # define lockdep_hardirq_exit()			\
 do {						\
-	this_cpu_dec(hardirq_context);		\
+	__this_cpu_dec(hardirq_context);	\
 } while (0)
 # define lockdep_softirq_enter()		\
 do {						\
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 62a382d1845b..6a584b3e5c74 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -535,19 +535,27 @@ do {									\
 DECLARE_PER_CPU(int, hardirqs_enabled);
 DECLARE_PER_CPU(int, hardirq_context);
 
+/*
+ * The below lockdep_assert_*() macros use raw_cpu_read() to access the above
+ * per-cpu variables. This is required because this_cpu_read() will potentially
+ * call into preempt/irq-disable and that obviously isn't right. This is also
+ * correct because when IRQs are enabled, it doesn't matter if we accidentally
+ * read the value from our previous CPU.
+ */
+
 #define lockdep_assert_irqs_enabled()					\
 do {									\
-	WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled));	\
+	WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled));	\
 } while (0)
 
 #define lockdep_assert_irqs_disabled()					\
 do {									\
-	WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled));	\
+	WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled));	\
 } while (0)
 
 #define lockdep_assert_in_irq()						\
 do {									\
-	WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context));	\
+	WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context));	\
 } while (0)
 
 #define lockdep_assert_preemption_enabled()				\
@@ -555,7 +563,7 @@ do {									\
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)	&&		\
 		     debug_locks			&&		\
 		     (preempt_count() != 0		||		\
-		      !this_cpu_read(hardirqs_enabled)));		\
+		      !raw_cpu_read(hardirqs_enabled)));		\
 } while (0)
 
 #define lockdep_assert_preemption_disabled()				\
@@ -563,7 +571,7 @@ do {									\
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)	&&		\
 		     debug_locks			&&		\
 		     (preempt_count() == 0		&&		\
-		      this_cpu_read(hardirqs_enabled)));		\
+		      raw_cpu_read(hardirqs_enabled)));			\
 } while (0)
 
 #else
-- 
cgit 


From bf9282dc26e7fe2a0736edc568762f0f05d12416 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 12 Aug 2020 12:22:17 +0200
Subject: cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic

This allows moving the leave_mm() call into generic code before
rcu_idle_enter(). Gets rid of more trace_*_rcuidle() users.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Marco Elver <elver@google.com>
Link: https://lkml.kernel.org/r/20200821085348.369441600@infradead.org
---
 include/linux/cpuidle.h     | 13 +++++++------
 include/linux/mmu_context.h |  5 +++++
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b65909ae4e20..75895e6363b8 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -75,12 +75,13 @@ struct cpuidle_state {
 };
 
 /* Idle State Flags */
-#define CPUIDLE_FLAG_NONE       (0x00)
-#define CPUIDLE_FLAG_POLLING	BIT(0) /* polling state */
-#define CPUIDLE_FLAG_COUPLED	BIT(1) /* state applies to multiple cpus */
-#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */
-#define CPUIDLE_FLAG_UNUSABLE	BIT(3) /* avoid using this state */
-#define CPUIDLE_FLAG_OFF	BIT(4) /* disable this state by default */
+#define CPUIDLE_FLAG_NONE       	(0x00)
+#define CPUIDLE_FLAG_POLLING		BIT(0) /* polling state */
+#define CPUIDLE_FLAG_COUPLED		BIT(1) /* state applies to multiple cpus */
+#define CPUIDLE_FLAG_TIMER_STOP 	BIT(2) /* timer is stopped on this state */
+#define CPUIDLE_FLAG_UNUSABLE		BIT(3) /* avoid using this state */
+#define CPUIDLE_FLAG_OFF		BIT(4) /* disable this state by default */
+#define CPUIDLE_FLAG_TLB_FLUSHED	BIT(5) /* idle-state flushes TLBs */
 
 struct cpuidle_device_kobj;
 struct cpuidle_state_kobj;
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index c51a84132d7c..03dee12d2b61 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -3,10 +3,15 @@
 #define _LINUX_MMU_CONTEXT_H
 
 #include <asm/mmu_context.h>
+#include <asm/mmu.h>
 
 /* Architectures that care about IRQ state in switch_mm can override this. */
 #ifndef switch_mm_irqs_off
 # define switch_mm_irqs_off switch_mm
 #endif
 
+#ifndef leave_mm
+static inline void leave_mm(int cpu) { }
+#endif
+
 #endif
-- 
cgit 


From 00b0ed2d4997af6d0a93edef820386951fd66d94 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 12 Aug 2020 19:28:06 +0200
Subject: locking/lockdep: Cleanup

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Marco Elver <elver@google.com>
Link: https://lkml.kernel.org/r/20200821085348.546087214@infradead.org
---
 include/linux/irqflags.h | 54 +++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index d7e50a215ea9..00d553d77911 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -49,10 +49,11 @@ struct irqtrace_events {
 DECLARE_PER_CPU(int, hardirqs_enabled);
 DECLARE_PER_CPU(int, hardirq_context);
 
-  extern void trace_hardirqs_on_prepare(void);
-  extern void trace_hardirqs_off_finish(void);
-  extern void trace_hardirqs_on(void);
-  extern void trace_hardirqs_off(void);
+extern void trace_hardirqs_on_prepare(void);
+extern void trace_hardirqs_off_finish(void);
+extern void trace_hardirqs_on(void);
+extern void trace_hardirqs_off(void);
+
 # define lockdep_hardirq_context()	(raw_cpu_read(hardirq_context))
 # define lockdep_softirq_context(p)	((p)->softirq_context)
 # define lockdep_hardirqs_enabled()	(this_cpu_read(hardirqs_enabled))
@@ -120,17 +121,17 @@ do {						\
 #else
 # define trace_hardirqs_on_prepare()		do { } while (0)
 # define trace_hardirqs_off_finish()		do { } while (0)
-# define trace_hardirqs_on()		do { } while (0)
-# define trace_hardirqs_off()		do { } while (0)
-# define lockdep_hardirq_context()	0
-# define lockdep_softirq_context(p)	0
-# define lockdep_hardirqs_enabled()	0
-# define lockdep_softirqs_enabled(p)	0
-# define lockdep_hardirq_enter()	do { } while (0)
-# define lockdep_hardirq_threaded()	do { } while (0)
-# define lockdep_hardirq_exit()		do { } while (0)
-# define lockdep_softirq_enter()	do { } while (0)
-# define lockdep_softirq_exit()		do { } while (0)
+# define trace_hardirqs_on()			do { } while (0)
+# define trace_hardirqs_off()			do { } while (0)
+# define lockdep_hardirq_context()		0
+# define lockdep_softirq_context(p)		0
+# define lockdep_hardirqs_enabled()		0
+# define lockdep_softirqs_enabled(p)		0
+# define lockdep_hardirq_enter()		do { } while (0)
+# define lockdep_hardirq_threaded()		do { } while (0)
+# define lockdep_hardirq_exit()			do { } while (0)
+# define lockdep_softirq_enter()		do { } while (0)
+# define lockdep_softirq_exit()			do { } while (0)
 # define lockdep_hrtimer_enter(__hrtimer)	false
 # define lockdep_hrtimer_exit(__context)	do { } while (0)
 # define lockdep_posixtimer_enter()		do { } while (0)
@@ -181,17 +182,25 @@ do {						\
  * if !TRACE_IRQFLAGS.
  */
 #ifdef CONFIG_TRACE_IRQFLAGS
-#define local_irq_enable() \
-	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
-#define local_irq_disable() \
-	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
+
+#define local_irq_enable()				\
+	do {						\
+		trace_hardirqs_on();			\
+		raw_local_irq_enable();			\
+	} while (0)
+
+#define local_irq_disable()				\
+	do {						\
+		raw_local_irq_disable();		\
+		trace_hardirqs_off();			\
+	} while (0)
+
 #define local_irq_save(flags)				\
 	do {						\
 		raw_local_irq_save(flags);		\
 		trace_hardirqs_off();			\
 	} while (0)
 
-
 #define local_irq_restore(flags)			\
 	do {						\
 		if (raw_irqs_disabled_flags(flags)) {	\
@@ -214,10 +223,7 @@ do {						\
 
 #define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
 #define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
-#define local_irq_save(flags)					\
-	do {							\
-		raw_local_irq_save(flags);			\
-	} while (0)
+#define local_irq_save(flags)	do { raw_local_irq_save(flags); } while (0)
 #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
 #define safe_halt()		do { raw_safe_halt(); } while (0)
 
-- 
cgit 


From 044d0d6de9f50192f9697583504a382347ee95ca Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 23 Jul 2020 20:56:14 +1000
Subject: lockdep: Only trace IRQ edges

Problem:

  raw_local_irq_save(); // software state on
  local_irq_save(); // software state off
  ...
  local_irq_restore(); // software state still off, because we don't enable IRQs
  raw_local_irq_restore(); // software state still off, *whoopsie*

existing instances:

 - lock_acquire()
     raw_local_irq_save()
     __lock_acquire()
       arch_spin_lock(&graph_lock)
         pv_wait() := kvm_wait() (same or worse for Xen/HyperV)
           local_irq_save()

 - trace_clock_global()
     raw_local_irq_save()
     arch_spin_lock()
       pv_wait() := kvm_wait()
	 local_irq_save()

 - apic_retrigger_irq()
     raw_local_irq_save()
     apic->send_IPI() := default_send_IPI_single_phys()
       local_irq_save()

Possible solutions:

 A) make it work by enabling the tracing inside raw_*()
 B) make it work by keeping tracing disabled inside raw_*()
 C) call it broken and clean it up now

Now, given that the only reason to use the raw_* variant is because you don't
want tracing. Therefore A) seems like a weird option (although it can be done).
C) is tempting, but OTOH it ends up converting a _lot_ of code to raw just
because there is one raw user, this strips the validation/tracing off for all
the other users.

So we pick B) and declare any code that ends up doing:

	raw_local_irq_save()
	local_irq_save()
	lockdep_assert_irqs_disabled();

broken. AFAICT this problem has existed forever, the only reason it came
up is because commit: 859d069ee1dd ("lockdep: Prepare for NMI IRQ
state tracking") changed IRQ tracing vs lockdep recursion and the
first instance is fairly common, the other cases hardly ever happen.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[rewrote changelog]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Marco Elver <elver@google.com>
Link: https://lkml.kernel.org/r/20200723105615.1268126-1-npiggin@gmail.com
---
 include/linux/irqflags.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 00d553d77911..3ed4e8771b64 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -191,25 +191,24 @@ do {						\
 
 #define local_irq_disable()				\
 	do {						\
+		bool was_disabled = raw_irqs_disabled();\
 		raw_local_irq_disable();		\
-		trace_hardirqs_off();			\
+		if (!was_disabled)			\
+			trace_hardirqs_off();		\
 	} while (0)
 
 #define local_irq_save(flags)				\
 	do {						\
 		raw_local_irq_save(flags);		\
-		trace_hardirqs_off();			\
+		if (!raw_irqs_disabled_flags(flags))	\
+			trace_hardirqs_off();		\
 	} while (0)
 
 #define local_irq_restore(flags)			\
 	do {						\
-		if (raw_irqs_disabled_flags(flags)) {	\
-			raw_local_irq_restore(flags);	\
-			trace_hardirqs_off();		\
-		} else {				\
+		if (!raw_irqs_disabled_flags(flags))	\
 			trace_hardirqs_on();		\
-			raw_local_irq_restore(flags);	\
-		}					\
+		raw_local_irq_restore(flags);		\
 	} while (0)
 
 #define safe_halt()				\
-- 
cgit 


From 9d3004bf7ba32c89873bb8422671d52b4bb52ce1 Mon Sep 17 00:00:00 2001
From: Simon Leiner <simon@leiner.me>
Date: Tue, 25 Aug 2020 11:31:53 +0200
Subject: arm/xen: Add misuse warning to virt_to_gfn

As virt_to_gfn uses virt_to_phys, it will return invalid addresses when
used with vmalloc'd addresses. This patch introduces a warning, when
virt_to_gfn is used in this way.

Signed-off-by: Simon Leiner <simon@leiner.me>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Link: https://lore.kernel.org/r/20200825093153.35500-2-simon@leiner.me
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/arm/page.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index d7f6af50e200..39df751d0dc4 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -76,7 +76,11 @@ static inline unsigned long bfn_to_pfn(unsigned long bfn)
 #define bfn_to_local_pfn(bfn)	bfn_to_pfn(bfn)
 
 /* VIRT <-> GUEST conversion */
-#define virt_to_gfn(v)		(pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT))
+#define virt_to_gfn(v)                                                         \
+	({                                                                     \
+		WARN_ON_ONCE(!virt_addr_valid(v));                              \
+		pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT);                 \
+	})
 #define gfn_to_virt(m)		(__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT))
 
 /* Only used in PV code. But ARM guests are always HVM. */
-- 
cgit 


From 30b8e6b22fd0f7a56911e69c681e92532e72e3b6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 27 Aug 2020 10:54:16 +0530
Subject: cpufreq: Use WARN_ON_ONCE() for invalid relation

The relation can't be invalid here, so if it turns out to be invalid,
just WARN_ON_ONCE() and return 0.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 8f141d4c859c..a911e5d06845 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -956,8 +956,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	case CPUFREQ_RELATION_C:
 		return cpufreq_table_find_index_c(policy, target_freq);
 	default:
-		pr_err("%s: Invalid relation: %d\n", __func__, relation);
-		return -EINVAL;
+		WARN_ON_ONCE(1);
+		return 0;
 	}
 }
 
-- 
cgit 


From ef91bb196b0db1013ef8705367bc2d7944ef696b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 28 Aug 2020 17:11:25 +1000
Subject: kernel.h: Silence sparse warning in lower_32_bits

I keep getting sparse warnings in crypto such as:

  CHECK   drivers/crypto/ccree/cc_hash.c
   drivers/crypto/ccree/cc_hash.c:49:9: warning: cast truncates bits from constant value (47b5481dbefa4fa4 becomes befa4fa4)
   drivers/crypto/ccree/cc_hash.c:49:26: warning: cast truncates bits from constant value (db0c2e0d64f98fa7 becomes 64f98fa7)
   [.. many more ..]

This patch removes the warning by adding a mask to keep sparse
happy.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 500def620d8f..c25b8e41c0ea 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -186,7 +186,7 @@
  * lower_32_bits - return bits 0-31 of a number
  * @n: the number we're accessing
  */
-#define lower_32_bits(n) ((u32)(n))
+#define lower_32_bits(n) ((u32)((n) & 0xffffffff))
 
 struct completion;
 struct pt_regs;
-- 
cgit