summaryrefslogtreecommitdiff
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 09:20:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-01 09:20:51 -0800
commitebaeabfa5ab711a9b69b686d58329e258fdae75f (patch)
treec4b0915701c65abed0bc3fe9f351f42daaaa9089 /fs/fs-writeback.c
parent9368f0f9419cde028a6e58331065900ff089bc36 (diff)
parent4952f35f0545f3b53dab8d5fd727c4827c2a2778 (diff)
Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull writeback updates from Christian Brauner: "Features: - Allow file systems to increase the minimum writeback chunk size. The relatively low minimal writeback size of 4MiB means that written back inodes on rotational media are switched a lot. Besides introducing additional seeks, this also can lead to extreme file fragmentation on zoned devices when a lot of files are cached relative to the available writeback bandwidth. This adds a superblock field that allows the file system to override the default size, and sets it to the zone size for zoned XFS. - Add logging for slow writeback when it exceeds sysctl_hung_task_timeout_secs. This helps identify tasks waiting for a long time and pinpoint potential issues. Recording the starting jiffies is also useful when debugging a crashed vmcore. - Wake up waiting tasks when finishing the writeback of a chunk Cleanups: - filemap_* writeback interface cleanups. Adding filemap_fdatawrite_wbc ended up being a mistake, as all but the original btrfs caller should be using better high level interfaces instead. This series removes all these low-level interfaces, switches btrfs to a more specific interface, and cleans up other too low-level interfaces. With this the writeback_control that is passed to the writeback code is only initialized in three places. - Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and filemap_fdatawrite_wbc - Add filemap_flush_nr helper for btrfs - Push struct writeback_control into start_delalloc_inodes in btrfs - Rename filemap_fdatawrite_range_kick to filemap_flush_range - Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm - Make wbc_to_tag() inline and use it in fs" * tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: Make wbc_to_tag() inline and use it in fs. xfs: set s_min_writeback_pages for zoned file systems writeback: allow the file system to override MIN_WRITEBACK_PAGES writeback: cleanup writeback_chunk_size mm: rename filemap_fdatawrite_range_kick to filemap_flush_range mm: remove __filemap_fdatawrite_range mm: remove filemap_fdatawrite_wbc mm: remove __filemap_fdatawrite mm,btrfs: add a filemap_flush_nr helper btrfs: push struct writeback_control into start_delalloc_inodes btrfs: use the local tmp_inode variable in start_delalloc_inodes ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers 9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs) writeback: Wake up waiting tasks when finishing the writeback of a chunk.
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c55
1 files changed, 34 insertions, 21 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 917a450c503b..6800886c4d10 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -14,6 +14,7 @@
* Additions for address_space-based writeback
*/
+#include <linux/sched/sysctl.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -32,11 +33,6 @@
#include "internal.h"
/*
- * 4MB minimal write chunk size
- */
-#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
-
-/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
struct wb_writeback_work {
@@ -200,6 +196,19 @@ static void wb_queue_work(struct bdi_writeback *wb,
spin_unlock_irq(&wb->work_lock);
}
+static bool wb_wait_for_completion_cb(struct wb_completion *done)
+{
+ unsigned long waited_secs = (jiffies - done->wait_start) / HZ;
+
+ done->progress_stamp = jiffies;
+ if (waited_secs > sysctl_hung_task_timeout_secs)
+ pr_info("INFO: The task %s:%d has been waiting for writeback "
+ "completion for more than %lu seconds.",
+ current->comm, current->pid, waited_secs);
+
+ return !atomic_read(&done->cnt);
+}
+
/**
* wb_wait_for_completion - wait for completion of bdi_writeback_works
* @done: target wb_completion
@@ -212,8 +221,9 @@ static void wb_queue_work(struct bdi_writeback *wb,
*/
void wb_wait_for_completion(struct wb_completion *done)
{
+ done->wait_start = jiffies;
atomic_dec(&done->cnt); /* put down the initial count */
- wait_event(*done->waitq, !atomic_read(&done->cnt));
+ wait_event(*done->waitq, wb_wait_for_completion_cb(done));
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -808,9 +818,9 @@ static void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
* @wbc: writeback_control of interest
* @inode: target inode
*
- * This function is to be used by __filemap_fdatawrite_range(), which is an
- * alternative entry point into writeback code, and first ensures @inode is
- * associated with a bdi_writeback and attaches it to @wbc.
+ * This function is to be used by filemap_writeback(), which is an alternative
+ * entry point into writeback code, and first ensures @inode is associated with
+ * a bdi_writeback and attaches it to @wbc.
*/
void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
struct inode *inode)
@@ -1882,8 +1892,8 @@ out:
return ret;
}
-static long writeback_chunk_size(struct bdi_writeback *wb,
- struct wb_writeback_work *work)
+static long writeback_chunk_size(struct super_block *sb,
+ struct bdi_writeback *wb, struct wb_writeback_work *work)
{
long pages;
@@ -1901,16 +1911,13 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
* (maybe slowly) sync all tagged pages
*/
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
- pages = LONG_MAX;
- else {
- pages = min(wb->avg_write_bandwidth / 2,
- global_wb_domain.dirty_limit / DIRTY_SCOPE);
- pages = min(pages, work->nr_pages);
- pages = round_down(pages + MIN_WRITEBACK_PAGES,
- MIN_WRITEBACK_PAGES);
- }
+ return LONG_MAX;
- return pages;
+ pages = min(wb->avg_write_bandwidth / 2,
+ global_wb_domain.dirty_limit / DIRTY_SCOPE);
+ pages = min(pages, work->nr_pages);
+ return round_down(pages + sb->s_min_writeback_pages,
+ sb->s_min_writeback_pages);
}
/*
@@ -2012,7 +2019,7 @@ static long writeback_sb_inodes(struct super_block *sb,
inode_state_set(inode, I_SYNC);
wbc_attach_and_unlock_inode(&wbc, inode);
- write_chunk = writeback_chunk_size(wb, work);
+ write_chunk = writeback_chunk_size(inode->i_sb, wb, work);
wbc.nr_to_write = write_chunk;
wbc.pages_skipped = 0;
@@ -2022,6 +2029,12 @@ static long writeback_sb_inodes(struct super_block *sb,
*/
__writeback_single_inode(inode, &wbc);
+ /* Report progress to inform the hung task detector of the progress. */
+ if (work->done && work->done->progress_stamp &&
+ (jiffies - work->done->progress_stamp) > HZ *
+ sysctl_hung_task_timeout_secs / 2)
+ wake_up_all(work->done->waitq);
+
wbc_detach_inode(&wbc);
work->nr_pages -= write_chunk - wbc.nr_to_write;
wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;