From 1888635532fbbd6be4a4368621085c3a197279f8 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Tue, 30 Sep 2025 16:53:15 +0800 Subject: writeback: Wake up waiting tasks when finishing the writeback of a chunk. Writing back a large number of pages can take a lots of time. This issue is exacerbated when the underlying device is slow or subject to block layer rate limiting, which in turn triggers unexpected hung task warnings. We can trigger a wake-up once a chunk has been written back and the waiting time for writeback exceeds half of sysctl_hung_task_timeout_secs. This action allows the hung task detector to be aware of the writeback progress, thereby eliminating these unexpected hung task warnings. This patch has passed the xfstests 'check -g quick' test based on ext4, with no additional failures introduced. Signed-off-by: Julian Sun Reviewed-by: Jan Kara Suggested-by: Peter Zijlstra Signed-off-by: Christian Brauner --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c5c9d89c73ed..c8aa749790b1 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -63,6 +63,7 @@ enum wb_reason { struct wb_completion { atomic_t cnt; wait_queue_head_t *waitq; + unsigned long progress_stamp; /* The jiffies when slow progress is detected */ }; #define __WB_COMPLETION_INIT(_waitq) \ -- cgit From d6e6215907640801b1f407dc9e871b19ca5a3805 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Tue, 30 Sep 2025 15:18:29 +0800 Subject: writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs) When a writeback work lasts for sysctl_hung_task_timeout_secs, we want to identify that there are tasks waiting for a long time-this helps us pinpoint potential issues. Additionally, recording the starting jiffies is useful when debugging a crashed vmcore. Signed-off-by: Julian Sun Signed-off-by: Christian Brauner --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c8aa749790b1..610ef62b6a32 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -64,6 +64,7 @@ struct wb_completion { atomic_t cnt; wait_queue_head_t *waitq; unsigned long progress_stamp; /* The jiffies when slow progress is detected */ + unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */ }; #define __WB_COMPLETION_INIT(_waitq) \ -- cgit From 7fabcb7fbabbcddd9dc42dbe4c92d18ce3e54283 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2025 10:04:17 +0200 Subject: mm,btrfs: add a filemap_flush_nr helper Abstract out the btrfs-specific behavior of kicking off I/O on a number of pages on an address_space into a well-defined helper. Note: there is no kerneldoc comment for the new function because it is not part of the public API. Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20251024080431.324236-7-hch@lst.de Reviewed-by: David Hildenbrand Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09b581c1d878..cebdf160d3dd 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -38,6 +38,7 @@ int filemap_invalidate_pages(struct address_space *mapping, int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); int filemap_flush(struct address_space *); +int filemap_flush_nr(struct address_space *mapping, long *nr_to_write); int filemap_fdatawait_keep_errors(struct address_space *mapping); int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); int filemap_fdatawait_range_keep_errors(struct address_space *mapping, -- cgit From 1bcb413d0cd80efb386751910036a93147fd8dbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2025 10:04:19 +0200 Subject: mm: remove filemap_fdatawrite_wbc Replace filemap_fdatawrite_wbc, which exposes a writeback_control to the callers with a filemap_writeback helper that takes all the possible arguments and declares the writeback_control itself. Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20251024080431.324236-9-hch@lst.de Reviewed-by: David Hildenbrand Reviewed-by: Jan Kara Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cebdf160d3dd..678d8ae23d01 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -60,8 +60,6 @@ int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); int filemap_check_errors(struct address_space *mapping); void __filemap_set_wb_err(struct address_space *mapping, int err); -int filemap_fdatawrite_wbc(struct address_space *mapping, - struct writeback_control *wbc); int kiocb_write_and_wait(struct kiocb *iocb, size_t count); static inline int filemap_write_and_wait(struct address_space *mapping) -- cgit From 45cbce5b8877f339b72548f60aa97634044c255c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2025 10:04:20 +0200 Subject: mm: remove __filemap_fdatawrite_range Use filemap_fdatawrite_range and filemap_fdatawrite_range_kick instead of the low-level __filemap_fdatawrite_range that requires the caller to know the internals of the writeback_control structure and remove __filemap_fdatawrite_range now that it is trivial and only two callers would be left. Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20251024080431.324236-10-hch@lst.de Reviewed-by: Jan Kara Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 678d8ae23d01..d0a7dd43c835 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -54,8 +54,6 @@ static inline int filemap_fdatawait(struct address_space *mapping) bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); -int __filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end, int sync_mode); int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); int filemap_check_errors(struct address_space *mapping); -- cgit From c28d67b33cbf6da2043ee7517f1aa4cbf92dbbba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Oct 2025 10:04:21 +0200 Subject: mm: rename filemap_fdatawrite_range_kick to filemap_flush_range Rename filemap_fdatawrite_range_kick to filemap_flush_range because it is the ranged version of filemap_flush. Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20251024080431.324236-11-hch@lst.de Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444..a5dbfa20f8d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3014,7 +3014,7 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); -int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, +int filemap_flush_range(struct address_space *mapping, loff_t start, loff_t end); static inline int file_write_and_wait(struct file *file) @@ -3051,8 +3051,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) } else if (iocb->ki_flags & IOCB_DONTCACHE) { struct address_space *mapping = iocb->ki_filp->f_mapping; - filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, - iocb->ki_pos - 1); + filemap_flush_range(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; -- cgit From 90db4d4441f58d433ecf74f7e3bd17e0a553c20c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Oct 2025 05:45:48 +0200 Subject: writeback: allow the file system to override MIN_WRITEBACK_PAGES The relatively low minimal writeback size of 4MiB means that written back inodes on rotational media are switched a lot. Besides introducing additional seeks, this also can lead to extreme file fragmentation on zoned devices when a lot of files are cached relative to the available writeback bandwidth. Add a superblock field that allows the file system to override the default size. Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20251017034611.651385-3-hch@lst.de Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/fs.h | 1 + include/linux/writeback.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a5dbfa20f8d7..6bf369095d2e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1583,6 +1583,7 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ + long s_min_writeback_pages; } __randomize_layout; static inline struct user_namespace *i_user_ns(const struct inode *inode) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 22dd4adc5667..49e1dd96f43e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -374,4 +374,9 @@ bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); +/* + * 4MB minimal write chunk size + */ +#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10)) + #endif /* WRITEBACK_H */ -- cgit From 4952f35f0545f3b53dab8d5fd727c4827c2a2778 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Mon, 29 Sep 2025 19:13:49 +0800 Subject: fs: Make wbc_to_tag() inline and use it in fs. The logic in wbc_to_tag() is widely used in file systems, so modify this function to be inline and use it in file systems. This patch has only passed compilation tests, but it should be fine. Signed-off-by: Julian Sun Reviewed-by: Qu Wenruo Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/writeback.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 49e1dd96f43e..2a81816f7507 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -196,6 +196,13 @@ static inline void wait_on_inode(struct inode *inode) !(READ_ONCE(inode->i_state) & I_NEW)); } +static inline xa_mark_t wbc_to_tag(struct writeback_control *wbc) +{ + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) + return PAGECACHE_TAG_TOWRITE; + return PAGECACHE_TAG_DIRTY; +} + #ifdef CONFIG_CGROUP_WRITEBACK #include -- cgit