From d217b5cea488c0f644c189f91b636aeefa12deb9 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 4 Nov 2024 11:45:42 +0800 Subject: f2fs: add parameter @len to f2fs_invalidate_internal_cache() New function can process some consecutive blocks at a time. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eade36c5ef13..86e547f008f9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2537,7 +2537,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr); + f2fs_invalidate_internal_cache(sbi, addr, 1); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); @@ -3857,7 +3857,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) goto out; } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr); + f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); @@ -4049,7 +4049,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); -- cgit From 66baee2b886d72ab6be11a08d4c7897f9612e25b Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:41 +0800 Subject: f2fs: introduce update_sit_entry_for_release/alloc() No logical changes, just for cleanliness. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 162 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 93 insertions(+), 69 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 86e547f008f9..bc761d9b889a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2426,16 +2426,103 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) +{ + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif + + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(!exist)) { + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } + } + + if (f2fs_block_unit_discard(sbi) && + f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + return del; +} + +static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) { - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; + } + + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + return del; +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + segno = GET_SEGNO(sbi, blkaddr); if (segno == NULL_SEGNO) return; @@ -2451,73 +2538,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(exist)) { - f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks--; - del = 0; - } - - if (f2fs_block_unit_discard(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) - sbi->discard_blks--; - - /* - * SSR should never reuse block which is checkpointed - * or newly invalidated. - */ - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { - if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks++; - } + del = update_sit_entry_for_alloc(sbi, se, blkaddr, offset, del); } else { - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks++; - del = 0; - } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - /* - * If checkpoints are off, we must not reuse data that - * was used in the previous checkpoint. If it was used - * before, we must track that to know how much space we - * really have. - */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { - spin_lock(&sbi->stat_lock); - sbi->unusable_block_count++; - spin_unlock(&sbi->stat_lock); - } - } - - if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) - sbi->discard_blks++; + del = update_sit_entry_for_release(sbi, se, blkaddr, offset, del); } - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks += del; __mark_sit_entry_dirty(sbi, segno); -- cgit From 81ffbd224e5f926bf8df01d6107db9c8779f7d57 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:42 +0800 Subject: f2fs: update_sit_entry_for_release() supports consecutive blocks. This function can process some consecutive blocks at a time. When using update_sit_entry() to release consecutive blocks, ensure that the consecutive blocks belong to the same segment. Because after update_sit_entry_for_realese(), @segno is still in use in update_sit_entry(). Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 75 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 30 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bc761d9b889a..67d2859831e4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2426,6 +2426,10 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } +/* + * NOTE: when updating multiple blocks at the same time, please ensure + * that the consecutive input blocks belong to the same segment. + */ static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, block_t blkaddr, unsigned int offset, int del) { @@ -2433,42 +2437,48 @@ static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_ent #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + int i; + int del_count = -del; + + f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); + for (i = 0; i < del_count; i++) { + exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } + mir_exist = f2fs_test_and_clear_bit(offset + i, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr + i, exist); + f2fs_bug_on(sbi, 1); + } #endif - if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks++; - del = 0; - } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - /* - * If checkpoints are off, we must not reuse data that - * was used in the previous checkpoint. If it was used - * before, we must track that to know how much space we - * really have. - */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { - spin_lock(&sbi->stat_lock); - sbi->unusable_block_count++; - spin_unlock(&sbi->stat_lock); + if (unlikely(!exist)) { + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); + f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del += 1; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } } - } - if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) - sbi->discard_blks++; + if (f2fs_block_unit_discard(sbi) && + f2fs_test_and_clear_bit(offset + i, se->discard_map)) + sbi->discard_blks++; - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks += del; + if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) + se->ckpt_valid_blocks -= 1; + } return del; } @@ -2517,6 +2527,11 @@ static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry return del; } +/* + * If releasing blocks, this function supports updating multiple consecutive blocks + * at one time, but please note that these consecutive blocks need to belong to the + * same segment. + */ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) { struct seg_entry *se; -- cgit From e53c568f4603e997426712146dce0bc194c1db12 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:43 +0800 Subject: f2fs: add parameter @len to f2fs_invalidate_blocks() New function can process some consecutive blocks at a time. Function f2fs_invalidate_blocks()->down_write() and up_write() are very time-consuming, so if f2fs_invalidate_blocks() can process consecutive blocks at one time, it will save a lot of time. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 67d2859831e4..813254dcc00e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -245,7 +245,7 @@ retry: if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -2567,25 +2567,43 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); + block_t addr_start = addr, addr_end = addr + len - 1; + unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; + unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr, 1); + f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); - update_segment_mtime(sbi, addr, 0); - update_sit_entry(sbi, addr, -1); + if (seg_num == 1) + cnt = len; + else + cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); + do { + update_segment_mtime(sbi, addr_start, 0); + update_sit_entry(sbi, addr_start, -cnt); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + /* update @addr_start and @cnt and @segno */ + addr_start = START_BLOCK(sbi, ++segno); + if (++i == seg_num) + cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; + else + cnt = max_blocks; + } while (i <= seg_num); up_write(&sit_i->sentry_lock); } -- cgit From 207764e5d6f19de483d7f0e43243d1a1fce4fb32 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 Jan 2025 19:19:40 +0800 Subject: f2fs: fix to avoid return invalid mtime from f2fs_get_section_mtime() syzbot reported a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/gc.c:373! CPU: 0 UID: 0 PID: 5316 Comm: syz.0.0 Not tainted 6.13.0-rc3-syzkaller-00044-gaef25be35d23 #0 RIP: 0010:get_cb_cost fs/f2fs/gc.c:373 [inline] RIP: 0010:get_gc_cost fs/f2fs/gc.c:406 [inline] RIP: 0010:f2fs_get_victim+0x68b1/0x6aa0 fs/f2fs/gc.c:912 Call Trace: __get_victim fs/f2fs/gc.c:1707 [inline] f2fs_gc+0xc89/0x2f60 fs/f2fs/gc.c:1915 f2fs_ioc_gc fs/f2fs/file.c:2624 [inline] __f2fs_ioctl+0x4cc9/0xb8b0 fs/f2fs/file.c:4482 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f w/ below testcase, it can reproduce directly: - dd if=/dev/zero of=/tmp/file bs=1M count=64 - mkfs.f2fs /tmp/file - mount -t f2fs -o loop,mode=fragment:block /tmp/file /mnt/f2fs - echo 0 > /sys/fs/f2fs/loop0/min_ssr_sections - dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=5 - umount /mnt/f2fs - for((i=4096;i<16384;i+=512)) do inject.f2fs --sit 0 --blk $i --mb mtime --val -1 /tmp/file; done - mount -o loop /tmp/file /mnt/f2fs - f2fs_io gc 0 /mnt/f2fs/file static unsigned int get_cb_cost() { ... mtime = f2fs_get_section_mtime(sbi, segno); f2fs_bug_on(sbi, mtime == INVALID_MTIME); ... } The root cause is: mtime in f2fs_sit_entry can be fuzzed to INVALID_MTIME, then it will trigger BUG_ON in get_cb_cost() during GC. Let's change behavior of f2fs_get_section_mtime() as below for fix: - return INVALID_MTIME only if total valid blocks is zero. - return INVALID_MTIME - 1 if average mtime calculated is INVALID_MTIME. Fixes: b19ee7272208 ("f2fs: introduce f2fs_get_section_mtime") Reported-by: syzbot+b9972806adbe20a910eb@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/6768c82e.050a0220.226966.0035.GAE@google.com Cc: liuderong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 813254dcc00e..b3a82a8cdc5f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5549,8 +5549,10 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, secno = GET_SEC_FROM_SEG(sbi, segno); start = GET_SEG_FROM_SEC(sbi, secno); - if (!__is_large_section(sbi)) - return get_seg_entry(sbi, start + i)->mtime; + if (!__is_large_section(sbi)) { + mtime = get_seg_entry(sbi, start + i)->mtime; + goto out; + } for (i = 0; i < usable_segs_per_sec; i++) { /* for large section, only check the mtime of valid segments */ @@ -5563,7 +5565,11 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, if (total_valid_blocks == 0) return INVALID_MTIME; - return div_u64(mtime, total_valid_blocks); + mtime = div_u64(mtime, total_valid_blocks); +out: + if (unlikely(mtime == INVALID_MTIME)) + mtime -= 1; + return mtime; } /* -- cgit From edf3c0860060f8171d60dc5a6c28cb6702559f32 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Tue, 21 Jan 2025 10:15:41 +0800 Subject: f2fs: fix to avoid changing 'check only' behaior of recovery The following two 'check only recovery' processes are very dependent on the return value of f2fs_recover_fsync_data, especially when the return value is greater than 0. 1. when device has readonly mode, shown as commit 23738e74472f ("f2fs: fix to restrict mount condition on readonly block device") 2. mount optiont NORECOVERY or DISABLE_ROLL_FORWARD is set, shown as commit 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount") However, commit c426d99127b1 ("f2fs: Check write pointer consistency of open zones") will change the return value unexpectedly, thereby changing the caller's behavior This patch let the f2fs_recover_fsync_data return correct value,and not do f2fs_check_and_fix_write_pointer when the device is read-only. Fixes: c426d99127b1 ("f2fs: Check write pointer consistency of open zones") Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b3a82a8cdc5f..dc1b47f9269a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5462,7 +5462,8 @@ int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { int ret; - if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || + f2fs_hw_is_readonly(sbi)) return 0; f2fs_notice(sbi, "Checking entire write pointers"); -- cgit From 03511e936916873bf880e6678c98d5fb59c19742 Mon Sep 17 00:00:00 2001 From: Jianan Huang Date: Fri, 24 Jan 2025 13:57:51 +0800 Subject: f2fs: fix inconsistent dirty state of atomic file When testing the atomic write fix patches, the f2fs_bug_on was triggered as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/inode.c:935! Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 3 UID: 0 PID: 257 Comm: bash Not tainted 6.13.0-rc1-00033-gc283a70d3497 #5 RIP: 0010:f2fs_evict_inode+0x50f/0x520 Call Trace: ? __die_body+0x65/0xb0 ? die+0x9f/0xc0 ? do_trap+0xa1/0x170 ? f2fs_evict_inode+0x50f/0x520 ? f2fs_evict_inode+0x50f/0x520 ? handle_invalid_op+0x65/0x80 ? f2fs_evict_inode+0x50f/0x520 ? exc_invalid_op+0x39/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? __pfx_f2fs_get_dquots+0x10/0x10 ? f2fs_evict_inode+0x50f/0x520 ? f2fs_evict_inode+0x2e5/0x520 evict+0x186/0x2f0 prune_icache_sb+0x75/0xb0 super_cache_scan+0x1a8/0x200 do_shrink_slab+0x163/0x320 shrink_slab+0x2fc/0x470 drop_slab+0x82/0xf0 drop_caches_sysctl_handler+0x4e/0xb0 proc_sys_call_handler+0x183/0x280 vfs_write+0x36d/0x450 ksys_write+0x68/0xd0 do_syscall_64+0xc8/0x1a0 ? arch_exit_to_user_mode_prepare+0x11/0x60 ? irqentry_exit_to_user_mode+0x7e/0xa0 The root cause is: f2fs uses FI_ATOMIC_DIRTIED to indicate dirty atomic files during commit. If the inode is dirtied during commit, such as by f2fs_i_pino_write, the vfs inode keeps clean and the f2fs inode is set to FI_DIRTY_INODE. The FI_DIRTY_INODE flag cann't be cleared by write_inode later due to the clean vfs inode. Finally, f2fs_bug_on is triggered due to this inconsistent state when evict. To reproduce this situation: - fd = open("/mnt/test.db", O_WRONLY) - ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE) - mv /mnt/test.db /mnt/test1.db - ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE) - echo 3 > /proc/sys/vm/drop_caches To fix this problem, clear FI_DIRTY_INODE after commit, then f2fs_mark_inode_dirty_sync will ensure a consistent dirty state. Fixes: fccaa81de87e ("f2fs: prevent atomic file from being dirtied before commit") Signed-off-by: Yunlei He Signed-off-by: Jianan Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dc1b47f9269a..c282e8a0a2ec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -201,6 +201,12 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_FILE); if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* + * The vfs inode keeps clean during commit, but the f2fs inode + * doesn't. So clear the dirty state after commit and let + * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. + */ + f2fs_inode_synced(inode); f2fs_mark_inode_dirty_sync(inode, true); } stat_dec_atomic_inode(inode); -- cgit