From 6de8687ccdefed40d617492f4e1b3962eb577b6b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:12 -0500 Subject: f2fs: remove allow_outplace_dio() We can just check f2fs_lfs_mode() directly. The block_unaligned_IO() check is redundant because in LFS mode, f2fs doesn't do direct I/O writes that aren't block-aligned (due to f2fs_force_buffered_io() returning true in this case, triggering the fallback to buffered I/O). Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6afd4562335f..b1cb5b50faac 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4292,7 +4292,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * back to buffered IO. */ if (!f2fs_force_buffered_io(inode, iocb, from) && - allow_outplace_dio(inode, iocb, from)) + f2fs_lfs_mode(F2FS_I_SB(inode))) goto write; } preallocated = true; -- cgit From 2787991516468bfafafb9bf2b45a848e6b202e7c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Jul 2021 09:03:29 +0800 Subject: f2fs: fix to force keeping write barrier for strict fsync mode [1] https://www.mail-archive.com/linux-f2fs-devel@lists.sourceforge.net/msg15126.html As [1] reported, if lower device doesn't support write barrier, in below case: - write page #0; persist - overwrite page #0 - fsync - write data page #0 OPU into device's cache - write inode page into device's cache - issue flush If SPO is triggered during flush command, inode page can be persisted before data page #0, so that after recovery, inode page can be recovered with new physical block address of data page #0, however there may contains dummy data in new physical block address. Then what user will see is: after overwrite & fsync + SPO, old data in file was corrupted, if any user do care about such case, we can suggest user to use STRICT fsync mode, in this mode, we will force to use atomic write sematics to keep write order in between data/node and last node, so that it avoids potential data corruption during fsync(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b1cb5b50faac..e931782abdab 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -301,6 +301,18 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, f2fs_exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; + } else { + /* + * for OPU case, during fsync(), node can be persisted before + * data when lower device doesn't support write barrier, result + * in data corruption after SPO. + * So for strict fsync mode, force to use atomic write sematics + * to keep write order in between data/node and last node to + * avoid potential data corruption. + */ + if (F2FS_OPTION(sbi).fsync_mode == + FSYNC_MODE_STRICT && !atomic) + atomic = true; } go_write: /* -- cgit From 0f6b56ec958d49e2b3dc955cdac6b62702c04b72 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 2 Aug 2021 21:22:45 -0700 Subject: f2fs: add sysfs node to control ra_pages for fadvise seq file fadvise() allows the user to expand the readahead window to double with POSIX_FADV_SEQUENTIAL, now. But, in some use cases, it is not that sufficient and we need to meet the need in a restricted way. We can control the multiplier value of bdi device readahead between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e931782abdab..7d8ee60f6c1f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -4344,6 +4345,34 @@ out: return ret; } +static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, + int advice) +{ + struct inode *inode; + struct address_space *mapping; + struct backing_dev_info *bdi; + + if (advice == POSIX_FADV_SEQUENTIAL) { + inode = file_inode(filp); + if (S_ISFIFO(inode->i_mode)) + return -ESPIPE; + + mapping = filp->f_mapping; + if (!mapping || len < 0) + return -EINVAL; + + bdi = inode_to_bdi(mapping->host); + filp->f_ra.ra_pages = bdi->ra_pages * + F2FS_I_SB(inode)->seq_file_ra_mul; + spin_lock(&filp->f_lock); + filp->f_mode &= ~FMODE_RANDOM; + spin_unlock(&filp->f_lock); + return 0; + } + + return generic_fadvise(filp, offset, len, advice); +} + #ifdef CONFIG_COMPAT struct compat_f2fs_gc_range { u32 sync; @@ -4472,4 +4501,5 @@ const struct file_operations f2fs_file_operations = { #endif .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, + .fadvise = f2fs_file_fadvise, }; -- cgit From 4a4fc043f594d39edc976a3a3dce7c40ebb86f3f Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Mon, 9 Aug 2021 10:21:04 +0800 Subject: f2fs: compress: allow write compress released file after truncate to zero For compressed file, after release compress blocks, don't allow write direct, but we should allow write direct after truncate to zero. Reviewed-by: Chao Yu Signed-off-by: Fengnan Chang Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d8ee60f6c1f..d4fc5e0d2ffe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -753,6 +753,14 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) return err; #ifdef CONFIG_F2FS_FS_COMPRESSION + /* + * For compressed file, after release compress blocks, don't allow write + * direct, but we should allow write direct after truncate to zero. + */ + if (f2fs_compressed_file(inode) && !free_from + && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + clear_inode_flag(inode, FI_COMPRESS_RELEASED); + if (from != free_from) { err = f2fs_truncate_partial_cluster(inode, from, lock); if (err) -- cgit From 521187439abfb3e1c946796dc2187c443e5457ab Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 19 Aug 2021 20:52:28 -0700 Subject: f2fs: separate out iostat feature Added F2FS_IOSTAT config option to support getting IO statistics through sysfs and printing out periodic IO statistics tracepoint events and moved I/O statistics related codes into separate files for better maintenance. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu [Jaegeuk Kim: set default=y] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d4fc5e0d2ffe..ab4ea2ddcc8b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -31,6 +31,7 @@ #include "xattr.h" #include "acl.h" #include "gc.h" +#include "iostat.h" #include #include -- cgit From d75da8c8a4c5c761936e4a51403f5f21e3aba935 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Aug 2021 08:11:38 +0800 Subject: f2fs: adjust unlock order for cleanup This patch adjusts unlock order of .i_mmap_sem and .i_gc_rwsem for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ab4ea2ddcc8b..cc2080866c54 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3497,8 +3497,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) released_blocks += ret; } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); out: inode_unlock(inode); @@ -3650,8 +3650,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) reserved_blocks += ret; } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (ret >= 0) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); -- cgit From c8dc3047c48540183744f959412d44b08c5435e1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Aug 2021 19:34:19 +0800 Subject: f2fs: fix to unmap pages from userspace process in punch_hole() We need to unmap pages from userspace process before removing pagecache in punch_hole() like we did in f2fs_setattr(). Similar change: commit 5e44f8c374dc ("ext4: hole-punch use truncate_pagecache_range") Fixes: fbfa2cc58d53 ("f2fs: add file operations") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cc2080866c54..7fca3ac75e9f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1107,7 +1107,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (pg_start < pg_end) { - struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1119,8 +1118,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_inode_pages_range(mapping, blk_start, - blk_end - 1); + truncate_pagecache_range(inode, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = f2fs_truncate_hole(inode, pg_start, pg_end); -- cgit From dddd3d65293a52c2c3850c19b1e5115712e534d8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Aug 2021 14:00:57 -0700 Subject: f2fs: guarantee to write dirty data when enabling checkpoint back We must flush all the dirty data when enabling checkpoint back. Let's guarantee that first by adding a retry logic on sync_inodes_sb(). In addition to that, this patch adds to flush data in fsync when checkpoint is disabled, which can mitigate the sync_inodes_sb() failures in advance. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/file.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7fca3ac75e9f..f30b841d4e5a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -263,8 +263,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, }; unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb) || - is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + if (unlikely(f2fs_readonly(inode->i_sb))) return 0; trace_f2fs_sync_file_enter(inode); @@ -278,7 +277,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, ret = file_write_and_wait_range(file, start, end); clear_inode_flag(inode, FI_NEED_IPU); - if (ret) { + if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } -- cgit