From 3373a3461aa15b7f9a871fa4cb2c9ef21ac20b47 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 18 Jun 2020 12:21:38 +0800 Subject: block: make function 'kill_bdev' static kill_bdev does not have any external user, so make it static. Signed-off-by: Zheng Bin Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..3f881a892ea7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2592,7 +2592,6 @@ extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern void emergency_thaw_bdev(struct super_block *sb); @@ -2608,7 +2607,6 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {} static inline struct super_block *freeze_bdev(struct block_device *sb) -- cgit From dd3e6d5039de1cbff4e20e2b34390ff44cdb182f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 22 May 2020 09:12:09 -0600 Subject: mm: add support for async page locking Normally waiting for a page to become unlocked, or locking the page, requires waiting for IO to complete. Add support for lock_page_async() and wait_on_page_locked_async(), which are callback based instead. This allows a caller to get notified when a page becomes unlocked, rather than wait for it. We add a new iocb field, ki_waitq, to pass in the necessary data for this to happen. We can unionize this with ki_cookie, since that is only used for polled IO. Polled IO can never co-exist with async callbacks, as it is (by definition) polled completions. struct wait_page_key is made public, and we define struct wait_page_async as the interface between the caller and the core. Acked-by: Johannes Weiner Signed-off-by: Jens Axboe --- include/linux/fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..2a5cf6080e68 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,8 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +/* iocb->ki_waitq is valid */ +#define IOCB_WAITQ (1 << 8) struct kiocb { struct file *ki_filp; @@ -328,7 +330,10 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - unsigned int ki_cookie; /* for ->iopoll */ + union { + unsigned int ki_cookie; /* for ->iopoll */ + struct wait_page_queue *ki_waitq; /* for async buffered IO */ + }; randomized_struct_fields_end }; -- cgit From c2a25ec0f1005dde004cd671484f578a9c8ca7de Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 22 May 2020 09:12:51 -0600 Subject: fs: add FMODE_BUF_RASYNC If set, this indicates that the file system supports IOCB_WAITQ for buffered reads. Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a5cf6080e68..4090320360f4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File supports async buffered reads */ +#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are -- cgit From b818f09e46f9f6a66471f81bf83094ff0a477d0c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:35 +0200 Subject: tty/sysrq: emergency_thaw_all does not depend on CONFIG_BLOCK We can also thaw non-block file systems. Remove the CONFIG_BLOCK in sysrq.c after making the prototype available unconditionally. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..7f40dbafbf6d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2593,7 +2593,6 @@ extern void invalidate_bdev(struct block_device *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); -extern void emergency_thaw_all(void); extern void emergency_thaw_bdev(struct super_block *sb); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); @@ -2633,6 +2632,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) return false; } #endif +void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -- cgit From 764b23bd9af8ff8ecc664816e39d4791b6a72bfd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:36 +0200 Subject: block: mark bd_finish_claiming static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f40dbafbf6d..b1c960e9b84e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2646,8 +2646,6 @@ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern struct block_device *bd_start_claiming(struct block_device *bdev, void *holder); -extern void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); extern void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); -- cgit From 7dbac5baa887facf80373825b8f66c626703621f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:37 +0200 Subject: fs: remove an unused block_device_operations forward declaration Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index b1c960e9b84e..0d282c853691 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1774,8 +1774,6 @@ struct dir_context { loff_t pos; }; -struct block_device_operations; - /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl * fields in struct file_operations. */ -- cgit From 4e24566a134ea167441a1ffa3d439a27cf400880 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:38 +0200 Subject: fs: remove the HAVE_UNLOCKED_IOCTL and HAVE_COMPAT_IOCTL defines These are not defined anywhere, and contrary to the comments we really do not care about out of tree code at all. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0d282c853691..224edcc5b56e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1774,12 +1774,6 @@ struct dir_context { loff_t pos; }; -/* These macros are for out of kernel modules to test that - * the kernel supports the unlocked_ioctl and compat_ioctl - * fields in struct file_operations. */ -#define HAVE_COMPAT_IOCTL 1 -#define HAVE_UNLOCKED_IOCTL 1 - /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. -- cgit From 75362a1792d16a61f0277d3610dea2f50a16bf3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:39 +0200 Subject: fs: remove the mount_bdev and kill_block_super stubs No one calls these functions without CONFIG_BLOCK, so don't bother stubbing them out. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 224edcc5b56e..9ee09e2b5a97 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2256,18 +2256,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -#ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -#else -static inline struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - return ERR_PTR(-ENODEV); -} -#endif extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2276,14 +2267,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); -#ifdef CONFIG_BLOCK void kill_block_super(struct super_block *sb); -#else -static inline void kill_block_super(struct super_block *sb) -{ - BUG(); -} -#endif void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); -- cgit From dd0dca223e091bbacdd3c7ce9cf06b373da59816 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:40 +0200 Subject: block: simplify sb_is_blkdev_sb Just use IS_ENABLED instead of providing a stub for !CONFIG_BLOCK. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ee09e2b5a97..7f3ae38335d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2557,6 +2557,12 @@ extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) +extern struct super_block *blockdev_superblock; +static inline bool sb_is_blkdev_sb(struct super_block *sb) +{ + return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; +} + #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); @@ -2572,13 +2578,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_bdev(struct super_block *sb); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); - -extern struct super_block *blockdev_superblock; - -static inline bool sb_is_blkdev_sb(struct super_block *sb) -{ - return sb == blockdev_superblock; -} #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -2602,11 +2601,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb) static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) { } - -static inline bool sb_is_blkdev_sb(struct super_block *sb) -{ - return false; -} #endif void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); -- cgit From 3f1266f1f82d7b8c72472a8921e80aa3e611fb62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:41 +0200 Subject: block: move block-related definitions out of fs.h Move most of the block related definition out of fs.h into more suitable headers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 92 ------------------------------------------------------ 1 file changed, 92 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f3ae38335d4..add30c3bdf9a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2563,79 +2563,10 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } -#ifdef CONFIG_BLOCK -extern int register_blkdev(unsigned int, const char *); -extern void unregister_blkdev(unsigned int, const char *); -extern struct block_device *bdget(dev_t); -extern struct block_device *bdgrab(struct block_device *bdev); -extern void bd_set_size(struct block_device *, loff_t size); -extern void bd_forget(struct inode *inode); -extern void bdput(struct block_device *); -extern void invalidate_bdev(struct block_device *); -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); -extern int sync_blockdev(struct block_device *bdev); -extern struct super_block *freeze_bdev(struct block_device *); -extern void emergency_thaw_bdev(struct super_block *sb); -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); -extern int fsync_bdev(struct block_device *); -#else -static inline void bd_forget(struct inode *inode) {} -static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} - -static inline int emergency_thaw_bdev(struct super_block *sb) -{ - return 0; -} - -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) -{ -} -#endif void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -#ifdef CONFIG_BLOCK -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, - void *holder); -extern struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder); -extern void bd_abort_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void blkdev_put(struct block_device *bdev, fmode_t mode); - -#ifdef CONFIG_SYSFS -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -#endif -#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 @@ -2666,31 +2597,12 @@ static inline void unregister_chrdev(unsigned int major, const char *name) __unregister_chrdev(major, 0, 256, name); } -/* fs/block_dev.c */ -#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ -#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ - -#ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_MAX 512 -extern const char *bdevname(struct block_device *bdev, char *buffer); -extern struct block_device *lookup_bdev(const char *); -extern void blkdev_show(struct seq_file *,off_t); - -#else -#define BLKDEV_MAJOR_MAX 0 -#endif - extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -#ifdef CONFIG_BLOCK -extern int revalidate_disk(struct gendisk *); -extern int check_disk_change(struct block_device *); -extern int __invalidate_device(struct block_device *, bool); -#endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -3090,10 +3002,6 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); -#ifdef CONFIG_BLOCK -extern int bdev_read_only(struct block_device *); -#endif -extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); -- cgit From 621c1f42945e76015c3a585e7a9fe6e71665eba0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:44 +0200 Subject: block: move struct block_device to blk_types.h Move the struct block_device definition together with most of the block layer definitions, as it has nothing to do with the rest of fs.h. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index add30c3bdf9a..1d7c4f7465d2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -470,45 +470,6 @@ struct address_space { * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ -struct request_queue; - -struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ - int bd_openers; - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; - struct mutex bd_mutex; /* open/close mutex */ - void * bd_claiming; - void * bd_holder; - int bd_holders; - bool bd_write_holder; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_disks; -#endif - struct block_device * bd_contains; - unsigned bd_block_size; - u8 bd_partno; - struct hd_struct * bd_part; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - int bd_invalidated; - struct gendisk * bd_disk; - struct request_queue * bd_queue; - struct backing_dev_info *bd_bdi; - struct list_head bd_list; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; - - /* The counter of freeze processes */ - int bd_fsfreeze_count; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; -} __randomize_layout; /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 @@ -907,8 +868,6 @@ static inline unsigned imajor(const struct inode *inode) return MAJOR(inode->i_rdev); } -extern struct block_device *I_BDEV(struct inode *inode); - struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ -- cgit From 21b9cb34385d93d661a9134adf19eae2bd734218 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 23 Jun 2020 09:09:03 +0200 Subject: fs: fs.h: fix a kernel-doc parameter description Changeset 3b0311e7ca71 ("vfs: track per-sb writeback errors and report them to syncfs") added a variant of filemap_sample_wb_err(), but it forgot to rename the arguments at the kernel-doc markup. Fix it. Fix those warnings: ./include/linux/fs.h:2845: warning: Function parameter or member 'file' not described in 'file_sample_sb_err' ./include/linux/fs.h:2845: warning: Excess function parameter 'mapping' description in 'file_sample_sb_err' Fixes: 3b0311e7ca71 ("vfs: track per-sb writeback errors and report them to syncfs") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/7b33bbceb29ac80874622a2bc84127bb10103245.1592895969.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..7e17ecc461d5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2829,7 +2829,7 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) /** * file_sample_sb_err - sample the current errseq_t to test for later errors - * @mapping: mapping to be sampled + * @file: file pointer to be sampled * * Grab the most current superblock-level errseq_t value for the given * struct file. -- cgit From 41da51bce36f44eefc1e3d0f47d18841cbd065ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 21 Nov 2019 23:25:07 +0000 Subject: fs: Add IOCB_NOIO flag for generic_file_read_iter Add an IOCB_NOIO flag that indicates to generic_file_read_iter that it shouldn't trigger any filesystem I/O for the actual request or for readahead. This allows to do tentative reads out of the page cache as some filesystems allow, and to take the appropriate locks and retry the reads only if the requested pages are not cached. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..4b7cb76e5837 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; -- cgit From 61a707c543e2afe3aa7e88f87267c5dafa4b5afa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:54:16 +0200 Subject: fs: add a __kernel_read helper This is the counterpart to __kernel_write, and skip the rw_verify_area call compared to kernel_read. Signed-off-by: Christoph Hellwig --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..22cbe7b2e919 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3033,6 +3033,7 @@ extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, lo extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); -- cgit From 775802c0571fb438cd4f6548a323f9e4cb89f5aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 11:17:46 +0200 Subject: fs: remove __vfs_read Fold it into the two callers. Signed-off-by: Christoph Hellwig --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 22cbe7b2e919..0c0ec76b600b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1917,7 +1917,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit From 457e7a135cbf0a0b5ed2717c192c0c57112c3b32 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 2 Jul 2020 01:56:04 +0000 Subject: fs: introduce SB_INLINECRYPT Introduce SB_INLINECRYPT, which is set by filesystems that wish to use blk-crypto for file content en/decryption. This flag maps to the '-o inlinecrypt' mount option which multiple filesystems will implement, and code in fs/crypto/ needs to be able to check for this mount option in a filesystem-independent way. Signed-off-by: Satya Tangirala Reviewed-by: Jaegeuk Kim Reviewed-by: Eric Biggers Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20200702015607.1215430-2-satyat@google.com Signed-off-by: Eric Biggers --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..b5e07fcdd11d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1380,6 +1380,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NODIRATIME 2048 /* Do not update directory access times */ #define SB_SILENT 32768 #define SB_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define SB_I_VERSION (1<<23) /* Update inode I_version field */ #define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ -- cgit From c04011fe8cbd80af1be6e12b53193bf3846750d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:47:43 +0200 Subject: fs: add a vfs_fchown helper Add a helper for struct file based chown operations. To be used by the initramfs code soon. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index f5abba86107d..0ddd64ca0b45 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,6 +1744,8 @@ int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); +int vfs_fchown(struct file *file, uid_t user, gid_t group); + extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT -- cgit From 9e96c8c0e94eea2f69a9705f5d0f51928ea26c17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jul 2020 08:55:05 +0200 Subject: fs: add a vfs_fchmod helper Add a helper for struct file based chmode operations. To be used by the initramfs code soon. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ddd64ca0b45..635086726f20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1745,6 +1745,7 @@ int vfs_mkobj(struct dentry *, umode_t, void *); int vfs_fchown(struct file *file, uid_t user, gid_t group); +int vfs_fchmod(struct file *file, umode_t mode); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -- cgit From 6414e9b09ffd197803f8e86ce2fafdaf1de4e8e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 12 Jul 2020 20:09:52 -0700 Subject: fs: define inode flags using bit numbers Define the VFS inode flags using bit numbers instead of hardcoding powers of 2, which has become unwieldy now that we're up to 65536. No change in the actual values. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/fs.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..9bf7a32f2932 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1982,27 +1982,27 @@ struct super_operations { /* * Inode flags - they have no relation to superblock flags now */ -#define S_SYNC 1 /* Writes are synced at once */ -#define S_NOATIME 2 /* Do not update access times */ -#define S_APPEND 4 /* Append-only file */ -#define S_IMMUTABLE 8 /* Immutable file */ -#define S_DEAD 16 /* removed, but still open directory */ -#define S_NOQUOTA 32 /* Inode is not counted to quota */ -#define S_DIRSYNC 64 /* Directory modifications are synchronous */ -#define S_NOCMTIME 128 /* Do not update file c/mtime */ -#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ -#define S_PRIVATE 512 /* Inode is fs-internal */ -#define S_IMA 1024 /* Inode has an associated IMA struct */ -#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ -#define S_NOSEC 4096 /* no suid or xattr security attributes */ +#define S_SYNC (1 << 0) /* Writes are synced at once */ +#define S_NOATIME (1 << 1) /* Do not update access times */ +#define S_APPEND (1 << 2) /* Append-only file */ +#define S_IMMUTABLE (1 << 3) /* Immutable file */ +#define S_DEAD (1 << 4) /* removed, but still open directory */ +#define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ +#define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ +#define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ +#define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ +#define S_PRIVATE (1 << 9) /* Inode is fs-internal */ +#define S_IMA (1 << 10) /* Inode has an associated IMA struct */ +#define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ +#define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX -#define S_DAX 8192 /* Direct Access, avoiding the page cache */ +#define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else -#define S_DAX 0 /* Make all the DAX code disappear */ +#define S_DAX 0 /* Make all the DAX code disappear */ #endif -#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ -#define S_CASEFOLD 32768 /* Casefolded file */ -#define S_VERITY 65536 /* Verity file (using fs/verity/) */ +#define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ +#define S_CASEFOLD (1 << 15) /* Casefolded file */ +#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system -- cgit From fd5ad30c782351ab4d4a15941fc61e743a1bd66c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Jul 2020 08:19:55 +0200 Subject: fs: expose utimes_common Rename utimes_common to vfs_utimes and make it available outside of utimes.c. This will be used by the initramfs unpacking code. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 635086726f20..a1d2685a4878 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1746,6 +1746,7 @@ int vfs_mkobj(struct dentry *, umode_t, int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); +int vfs_utimes(const struct path *path, struct timespec64 *times); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -- cgit From 1752f0adea98ef859978c090e0726844348758f9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 1 Aug 2020 13:36:33 +0300 Subject: fs: optimise kiocb_set_rw_flags() Use a local var to collect flags in kiocb_set_rw_flags(). That spares some memory writes and allows to replace most of the jumps with MOVEcc. Signed-off-by: Pavel Begunkov Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- include/linux/fs.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4090320360f4..e535543d31d9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3446,22 +3446,28 @@ static inline int iocb_flags(struct file *file) static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { + int kiocb_flags = 0; + + if (!flags) + return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - ki->ki_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT; } if (flags & RWF_HIPRI) - ki->ki_flags |= IOCB_HIPRI; + kiocb_flags |= IOCB_HIPRI; if (flags & RWF_DSYNC) - ki->ki_flags |= IOCB_DSYNC; + kiocb_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) - ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); if (flags & RWF_APPEND) - ki->ki_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_APPEND; + + ki->ki_flags |= kiocb_flags; return 0; } -- cgit From c9dff08485a6c11449307abde1d7bb404fcee481 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Jul 2020 14:49:30 +0200 Subject: fs: Fix typo in comment The comment for function filemap_check_wb_err accidentally refers to it as filemap_check_wb_error. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index f5abba86107d..f9ae45795c1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2798,7 +2798,7 @@ static inline void filemap_set_wb_err(struct address_space *mapping, int err) } /** - * filemap_check_wb_error - has an error occurred since the mark was sampled? + * filemap_check_wb_err - has an error occurred since the mark was sampled? * @mapping: mapping to check for writeback errors * @since: previously-sampled errseq_t * -- cgit From e809d5f0b5c912fe981dce738f3283b2010665f0 Mon Sep 17 00:00:00 2001 From: Chris Down Date: Thu, 6 Aug 2020 23:20:20 -0700 Subject: tmpfs: per-superblock i_ino support Patch series "tmpfs: inode: Reduce risk of inum overflow", v7. In Facebook production we are seeing heavy i_ino wraparounds on tmpfs. On affected tiers, in excess of 10% of hosts show multiple files with different content and the same inode number, with some servers even having as many as 150 duplicated inode numbers with differing file content. This causes actual, tangible problems in production. For example, we have complaints from those working on remote caches that their application is reporting cache corruptions because it uses (device, inodenum) to establish the identity of a particular cache object, but because it's not unique any more, the application refuses to continue and reports cache corruption. Even worse, sometimes applications may not even detect the corruption but may continue anyway, causing phantom and hard to debug behaviour. In general, userspace applications expect that (device, inodenum) should be enough to be uniquely point to one inode, which seems fair enough. One might also need to check the generation, but in this case: 1. That's not currently exposed to userspace (ioctl(...FS_IOC_GETVERSION...) returns ENOTTY on tmpfs); 2. Even with generation, there shouldn't be two live inodes with the same inode number on one device. In order to mitigate this, we take a two-pronged approach: 1. Moving inum generation from being global to per-sb for tmpfs. This itself allows some reduction in i_ino churn. This works on both 64- and 32- bit machines. 2. Adding inode{64,32} for tmpfs. This fix is supported on machines with 64-bit ino_t only: we allow users to mount tmpfs with a new inode64 option that uses the full width of ino_t, or CONFIG_TMPFS_INODE64. You can see how this compares to previous related patches which didn't implement this per-superblock: - https://patchwork.kernel.org/patch/11254001/ - https://patchwork.kernel.org/patch/11023915/ This patch (of 2): get_next_ino has a number of problems: - It uses and returns a uint, which is susceptible to become overflowed if a lot of volatile inodes that use get_next_ino are created. - It's global, with no specificity per-sb or even per-filesystem. This means it's not that difficult to cause inode number wraparounds on a single device, which can result in having multiple distinct inodes with the same inode number. This patch adds a per-superblock counter that mitigates the second case. This design also allows us to later have a specific i_ino size per-device, for example, allowing users to choose whether to use 32- or 64-bit inodes for each tmpfs mount. This is implemented in the next commit. For internal shmem mounts which may be less tolerant to spinlock delays, we implement a percpu batching scheme which only takes the stat_lock at each batch boundary. Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Hugh Dickins Cc: Amir Goldstein Cc: Al Viro Cc: Matthew Wilcox Cc: Jeff Layton Cc: Johannes Weiner Cc: Tejun Heo Link: http://lkml.kernel.org/r/cover.1594661218.git.chris@chrisdown.name Link: http://lkml.kernel.org/r/1986b9d63b986f08ec07a4aa4b2275e718e47d8a.1594661218.git.chris@chrisdown.name Signed-off-by: Linus Torvalds --- include/linux/fs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 488c3ef93601..b1c3a14f12e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2946,6 +2946,21 @@ extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); +/* + * Userspace may rely on the the inode number being non-zero. For example, glibc + * simply ignores files with zero i_ino in unlink() and other places. + * + * As an additional complication, if userspace was compiled with + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the + * lower 32 bits, so we need to check that those aren't zero explicitly. With + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but + * better safe than sorry. + */ +static inline bool is_zero_ino(ino_t ino) +{ + return (u32)ino == 0; +} + extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); -- cgit From 45e55300f11495ed58c53427da7f0d958800a30f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 6 Aug 2020 23:23:37 -0700 Subject: mm: remove unnecessary wrapper function do_mmap_pgoff() The current split between do_mmap() and do_mmap_pgoff() was introduced in commit 1fcfd8db7f82 ("mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()") to support MPX. The wrapper function do_mmap_pgoff() always passed 0 as the value of the vm_flags argument to do_mmap(). However, MPX support has subsequently been removed from the kernel and there were no more direct callers of do_mmap(); all calls were going via do_mmap_pgoff(). Simplify the code by removing do_mmap_pgoff() and changing all callers to directly call do_mmap(), which now no longer takes a vm_flags argument. Signed-off-by: Peter Collingbourne Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: David Hildenbrand Link: http://lkml.kernel.org/r/20200727194109.1371462-1-pcc@google.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index b1c3a14f12e8..f5da33bcaaae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -528,7 +528,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * -- cgit From efa8480a831673bb52400df9dbe5da0aacda97bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 18:44:24 -0600 Subject: fs: RWF_NOWAIT should imply IOCB_NOIO With the change allowing read-ahead for IOCB_NOWAIT, we changed the RWF_NOWAIT semantics of only doing cached reads. Since we know have IOCB_NOIO to manage that specific side of it, just make RWF_NOWAIT imply IOCB_NOIO as well to restore the previous behavior. Fixes: 2e85abf053b9 ("mm: allow read-ahead with IOCB_NOWAIT set") Reported-by: Dave Chinner Reviewed-by: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index bd7ec3eaeed0..f1cca4bfdd7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3293,7 +3293,7 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; } if (flags & RWF_HIPRI) kiocb_flags |= IOCB_HIPRI; -- cgit From 34ae204f18519f0920bd50a644abd6fefc8dbfcf Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Aug 2020 18:31:38 -0700 Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem in at least read mode. This is because the explicit locking in huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring the code, the call to huge_pte_alloc in the else block at the beginning of hugetlb_fault was missed. Unfortunately, that else clause is exercised when there is no page table entry. This will likely lead to a call to huge_pmd_share. If huge_pmd_share thinks pmd sharing is possible, it will traverse the mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is modifying the tree, bad things such as addressing exceptions or worse could happen. Simply remove the else clause. It should have been removed previously. The code following the else will call huge_pte_alloc with the appropriate locking. To prevent this type of issue in the future, add routines to assert that i_mmap_rwsem is held, and call these routines in huge pmd sharing routines. Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Suggested-by: Matthew Wilcox Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Andrea Arcangeli Cc: "Kirill A.Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 011af396aa17..7c69dd7c6160 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) up_read(&mapping->i_mmap_rwsem); } +static inline void i_mmap_assert_locked(struct address_space *mapping) +{ + lockdep_assert_held(&mapping->i_mmap_rwsem); +} + +static inline void i_mmap_assert_write_locked(struct address_space *mapping) +{ + lockdep_assert_held_write(&mapping->i_mmap_rwsem); +} + /* * Might pages of this file be mapped into userspace? */ -- cgit