diff options
Diffstat (limited to 'fs/ext4/ext4.h')
-rw-r--r-- | fs/ext4/ext4.h | 183 |
1 files changed, 123 insertions, 60 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4e7de7eaa374..18373de980f2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -256,9 +256,19 @@ struct ext4_allocation_request { #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) #define EXT4_MAP_DELAYED BIT(BH_Delay) +/* + * This is for use in ext4_map_query_blocks() for a special case where we can + * have a physically and logically contiguous blocks split across two leaf + * nodes instead of a single extent. This is required in case of atomic writes + * to know whether the returned extent is last in leaf. If yes, then lookup for + * next in leaf block in ext4_map_query_blocks_next_in_leaf(). + * - This is never going to be added to any buffer head state. + * - We use the next available bit after BH_BITMAP_UPTODATE. + */ +#define EXT4_MAP_QUERY_LAST_IN_LEAF BIT(BH_BITMAP_UPTODATE + 1) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ - EXT4_MAP_DELAYED) + EXT4_MAP_DELAYED | EXT4_MAP_QUERY_LAST_IN_LEAF) struct ext4_map_blocks { ext4_fsblk_t m_pblk; @@ -278,7 +288,10 @@ struct ext4_system_blocks { /* * Flags for ext4_io_end->flags */ -#define EXT4_IO_END_UNWRITTEN 0x0001 +#define EXT4_IO_END_UNWRITTEN 0x0001 +#define EXT4_IO_END_FAILED 0x0002 + +#define EXT4_IO_END_DEFER_COMPLETION (EXT4_IO_END_UNWRITTEN | EXT4_IO_END_FAILED) struct ext4_io_end_vec { struct list_head list; /* list of io_end_vec */ @@ -367,6 +380,8 @@ struct ext4_io_submit { #define EXT4_MAX_BLOCKS(size, offset, blkbits) \ ((EXT4_BLOCK_ALIGN(size + offset, blkbits) >> blkbits) - (offset >> \ blkbits)) +#define EXT4_B_TO_LBLK(inode, offset) \ + (round_up((offset), i_blocksize(inode)) >> (inode)->i_blkbits) /* Translate a block number to a cluster number */ #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) @@ -701,9 +716,6 @@ enum { #define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) - /* Convert extent to initialized after IO complete */ -#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) /* Eventual metadata allocation (due to growing extent tree) * should not fail, so try to use reserved blocks for that.*/ #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 @@ -715,11 +727,23 @@ enum { #define EXT4_GET_BLOCKS_ZERO 0x0200 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ EXT4_GET_BLOCKS_ZERO) - /* Caller will submit data before dropping transaction handle. This - * allows jbd2 to avoid submitting data before commit. */ + /* Caller is in the context of data submission, such as writeback, + * fsync, etc. Especially, in the generic writeback path, caller will + * submit data before dropping transaction handle. This allows jbd2 + * to avoid submitting data before commit. */ #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 + /* Convert extent to initialized after IO complete */ +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT |\ + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |\ + EXT4_GET_BLOCKS_IO_SUBMIT) /* Caller is in the atomic contex, find extent if it has been cached */ #define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800 +/* + * Atomic write caller needs this to query in the slow path of mixed mapping + * case, when a contiguous extent can be split across two adjacent leaf nodes. + * Look EXT4_MAP_QUERY_LAST_IN_LEAF. + */ +#define EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF 0x1000 /* * The bit position of these flags must not overlap with any of the @@ -733,6 +757,13 @@ enum { #define EXT4_EX_NOCACHE 0x40000000 #define EXT4_EX_FORCE_CACHE 0x20000000 #define EXT4_EX_NOFAIL 0x10000000 +/* + * ext4_map_query_blocks() uses this filter mask to filter the flags needed to + * pass while lookup/querying of on disk extent tree. + */ +#define EXT4_EX_QUERY_FILTER (EXT4_EX_NOCACHE | EXT4_EX_FORCE_CACHE |\ + EXT4_EX_NOFAIL |\ + EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF) /* * Flags used by ext4_free_blocks @@ -1056,15 +1087,16 @@ struct ext4_inode_info { /* End of lblk range that needs to be committed in this fast commit */ ext4_lblk_t i_fc_lblk_len; - /* Number of ongoing updates on this inode */ - atomic_t i_fc_updates; - atomic_t i_unwritten; /* Nr. of inflight conversions pending */ + spinlock_t i_raw_lock; /* protects updates to the raw inode */ /* Fast commit wait queue for this inode */ wait_queue_head_t i_fc_wait; - /* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */ - struct mutex i_fc_lock; + /* + * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len + * and inode's EXT4_FC_STATE_COMMITTING state bit. + */ + spinlock_t i_fc_lock; /* * i_disksize keeps track of what the inode size is ON DISK, not @@ -1097,8 +1129,6 @@ struct ext4_inode_info { struct inode vfs_inode; struct jbd2_inode *jinode; - spinlock_t i_raw_lock; /* protects updates to the raw inode */ - /* * File creation time. Its function is same as that of * struct timespec64 i_{a,c,m}time in the generic inode. @@ -1141,6 +1171,7 @@ struct ext4_inode_info { /* quota space reservation, managed internally by quota code */ qsize_t i_reserved_quota; #endif + spinlock_t i_block_reservation_lock; /* Lock protecting lists below */ spinlock_t i_completed_io_lock; @@ -1151,8 +1182,6 @@ struct ext4_inode_info { struct list_head i_rsv_conversion_list; struct work_struct i_rsv_conversion_work; - spinlock_t i_block_reservation_lock; - /* * Transactions that contain inode's metadata needed to complete * fsync and fdatasync, respectively. @@ -1606,6 +1635,8 @@ struct ext4_sb_info { unsigned int s_mb_prefetch; unsigned int s_mb_prefetch_limit; unsigned int s_mb_best_avail_max_trim_order; + unsigned int s_sb_update_sec; + unsigned int s_sb_update_kb; /* stats for buddy allocator */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ @@ -1749,7 +1780,7 @@ struct ext4_sb_info { * following fields: * ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh. */ - spinlock_t s_fc_lock; + struct mutex s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats; tid_t s_fc_ineligible_tid; @@ -1821,7 +1852,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) */ enum { EXT4_MF_MNTDIR_SAMPLED, - EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ + EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ + EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) @@ -1907,6 +1939,7 @@ enum { EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ EXT4_STATE_FC_COMMITTING, /* Fast commit ongoing */ + EXT4_STATE_FC_FLUSHING_DATA, /* Fast commit flushing data */ EXT4_STATE_ORPHAN_FILE, /* Inode orphaned in orphan file */ }; @@ -2232,15 +2265,32 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly); /* * Superblock flags */ -#define EXT4_FLAGS_RESIZING 0 -#define EXT4_FLAGS_SHUTDOWN 1 -#define EXT4_FLAGS_BDEV_IS_DAX 2 +enum { + EXT4_FLAGS_RESIZING, /* Avoid superblock update and resize race */ + EXT4_FLAGS_SHUTDOWN, /* Prevent access to the file system */ + EXT4_FLAGS_BDEV_IS_DAX, /* Current block device support DAX */ + EXT4_FLAGS_EMERGENCY_RO,/* Emergency read-only due to fs errors */ +}; static inline int ext4_forced_shutdown(struct super_block *sb) { return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags); } +static inline int ext4_emergency_ro(struct super_block *sb) +{ + return test_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags); +} + +static inline int ext4_emergency_state(struct super_block *sb) +{ + if (unlikely(ext4_forced_shutdown(sb))) + return -EIO; + if (unlikely(ext4_emergency_ro(sb))) + return -EROFS; + return 0; +} + /* * Default values for user and/or group using reserved blocks */ @@ -2272,10 +2322,19 @@ static inline int ext4_forced_shutdown(struct super_block *sb) #define EXT4_DEFM_NODELALLOC 0x0800 /* - * Default journal batch times + * Default journal batch times and ioprio. */ #define EXT4_DEF_MIN_BATCH_TIME 0 #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ +#define EXT4_DEF_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) + + +/* + * Default values for superblock update + */ +#define EXT4_DEF_SB_UPDATE_INTERVAL_SEC (3600) /* seconds (1 hour) */ +#define EXT4_DEF_SB_UPDATE_INTERVAL_KB (16384) /* kilobytes (16MB) */ + /* * Minimum number of groups in a flexgroup before we separate out @@ -2457,8 +2516,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define DX_HASH_SIPHASH 6 #define DX_HASH_LAST DX_HASH_SIPHASH -static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, - const void *address, unsigned int length) +static inline u32 ext4_chksum(u32 crc, const void *address, unsigned int length) { return crc32c(crc, address, length); } @@ -2810,8 +2868,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, struct ext4_dir_entry_2 *dirent, struct fscrypt_str *ent_name); extern void ext4_htree_free_dir_info(struct dir_private_info *p); -extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, - struct buffer_head *bh, +extern int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh, void *buf, int buf_size, struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de); @@ -2893,8 +2950,6 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle); -void ext4_fc_start_update(struct inode *inode); -void ext4_fc_stop_update(struct inode *inode); void ext4_fc_del(struct inode *inode); bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block); void ext4_fc_replay_cleanup(struct super_block *sb); @@ -2944,6 +2999,7 @@ static inline bool ext4_mb_cr_expensive(enum criteria cr) void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei); int ext4_inode_is_fast_symlink(struct inode *inode); +void ext4_check_map_extents_env(struct inode *inode); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, @@ -2964,6 +3020,7 @@ int ext4_walk_page_buffers(handle_t *handle, struct buffer_head *bh)); int do_journal_get_write_access(handle_t *handle, struct inode *inode, struct buffer_head *bh); +bool ext4_should_enable_large_folio(struct inode *inode); #define FALL_BACK_TO_NONDELALLOC 1 #define CONVERT_INLINE_DATA 2 @@ -3001,6 +3058,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); extern int ext4_break_layouts(struct inode *); +extern int ext4_truncate_page_cache_block_range(struct inode *inode, + loff_t start, loff_t end); extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); extern void ext4_set_inode_flags(struct inode *, bool init); extern int ext4_alloc_da_blocks(struct inode *inode); @@ -3008,6 +3067,8 @@ extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); +extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks, + int pextents); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf); @@ -3019,6 +3080,17 @@ extern void ext4_da_update_reserve_space(struct inode *inode, extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, ext4_lblk_t len); +static inline bool is_special_ino(struct super_block *sb, unsigned long ino) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + return (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) || + ino == le32_to_cpu(es->s_usr_quota_inum) || + ino == le32_to_cpu(es->s_grp_quota_inum) || + ino == le32_to_cpu(es->s_prj_quota_inum) || + ino == le32_to_cpu(es->s_orphan_file_inum); +} + /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); @@ -3088,8 +3160,7 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wa extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); -extern __le32 ext4_superblock_csum(struct super_block *sb, - struct ext4_super_block *es); +extern __le32 ext4_superblock_csum(struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb); extern int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup); @@ -3259,14 +3330,10 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, extern int ext4_register_li_request(struct super_block *sb, ext4_group_t first_not_zeroed); -static inline int ext4_has_metadata_csum(struct super_block *sb) -{ - return ext4_has_feature_metadata_csum(sb); -} - static inline int ext4_has_group_desc_csum(struct super_block *sb) { - return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb); + return ext4_has_feature_gdt_csum(sb) || + ext4_has_feature_metadata_csum(sb); } #define ext4_read_incompat_64bit_val(es, name) \ @@ -3351,6 +3418,13 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) return 1 << sbi->s_log_groups_per_flex; } +static inline loff_t ext4_get_maxbytes(struct inode *inode) +{ + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + return inode->i_sb->s_maxbytes; + return EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; +} + #define ext4_std_error(sb, errno) \ do { \ if ((errno)) \ @@ -3546,11 +3620,11 @@ extern int ext4_try_to_write_inline_data(struct address_space *mapping, struct folio **foliop); int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct folio *folio); -extern int ext4_da_write_inline_data_begin(struct address_space *mapping, - struct inode *inode, - loff_t pos, unsigned len, - struct folio **foliop, - void **fsdata); +extern int ext4_generic_write_inline_data(struct address_space *mapping, + struct inode *inode, + loff_t pos, unsigned len, + struct folio **foliop, + void **fsdata, bool da); extern int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); @@ -3683,6 +3757,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, loff_t offset, ssize_t len); +extern int ext4_convert_unwritten_extents_atomic(handle_t *handle, + struct inode *inode, loff_t offset, ssize_t len); extern int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, @@ -3785,34 +3861,19 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } -/* For ioend & aio unwritten conversion wait queues */ -#define EXT4_WQ_HASH_SZ 37 -#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ - EXT4_WQ_HASH_SZ]) -extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; - extern int ext4_resize_begin(struct super_block *sb); extern int ext4_resize_end(struct super_block *sb, bool update_backups); -static inline void ext4_set_io_unwritten_flag(struct inode *inode, - struct ext4_io_end *io_end) +static inline void ext4_set_io_unwritten_flag(struct ext4_io_end *io_end) { - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) io_end->flag |= EXT4_IO_END_UNWRITTEN; - atomic_inc(&EXT4_I(inode)->i_unwritten); - } } static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) { - struct inode *inode = io_end->inode; - - if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + if (io_end->flag & EXT4_IO_END_UNWRITTEN) io_end->flag &= ~EXT4_IO_END_UNWRITTEN; - /* Wake up anyone waiting on unwritten extent conversion */ - if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) - wake_up_all(ext4_ioend_wq(inode)); - } } extern const struct iomap_ops ext4_iomap_ops; @@ -3835,7 +3896,9 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh) static inline bool ext4_inode_can_atomic_write(struct inode *inode) { - return S_ISREG(inode->i_mode) && EXT4_SB(inode->i_sb)->s_awu_min > 0; + return S_ISREG(inode->i_mode) && + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + EXT4_SB(inode->i_sb)->s_awu_min > 0; } extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, |