summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c165
1 files changed, 77 insertions, 88 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 78ea864fa8cd..0c466ccbed69 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -549,10 +549,13 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
retval = ext4_ext_map_blocks(handle, inode, map, flags);
else
retval = ext4_ind_map_blocks(handle, inode, map, flags);
-
- if (retval <= 0)
+ if (retval < 0)
return retval;
+ /* A hole? */
+ if (retval == 0)
+ goto out;
+
if (unlikely(retval != map->m_len)) {
ext4_warning(inode->i_sb,
"ES len assertion failed for inode "
@@ -572,11 +575,13 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status, false);
- return retval;
+ } else {
+ retval = ext4_map_query_blocks_next_in_leaf(handle, inode, map,
+ orig_mlen);
}
-
- return ext4_map_query_blocks_next_in_leaf(handle, inode, map,
- orig_mlen);
+out:
+ map->m_seq = READ_ONCE(EXT4_I(inode)->i_es_seq);
+ return retval;
}
static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
@@ -647,8 +652,8 @@ static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
* If the extent has been zeroed out, we don't need to update
* extent status tree.
*/
- if (flags & EXT4_GET_BLOCKS_PRE_IO &&
- ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, &map->m_seq)) {
if (ext4_es_is_written(&es))
return retval;
}
@@ -657,6 +662,7 @@ static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk,
status, flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+ map->m_seq = READ_ONCE(EXT4_I(inode)->i_es_seq);
return retval;
}
@@ -722,7 +728,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ext4_check_map_extents_env(inode);
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, &map->m_seq)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
@@ -809,7 +815,13 @@ found:
down_write(&EXT4_I(inode)->i_data_sem);
retval = ext4_map_create_blocks(handle, inode, map, flags);
up_write((&EXT4_I(inode)->i_data_sem));
- if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+
+ if (retval < 0)
+ ext_debug(inode, "failed with err %d\n", retval);
+ if (retval <= 0)
+ return retval;
+
+ if (map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
@@ -824,9 +836,8 @@ found:
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
- loff_t start_byte =
- (loff_t)map->m_lblk << inode->i_blkbits;
- loff_t length = (loff_t)map->m_len << inode->i_blkbits;
+ loff_t start_byte = EXT4_LBLK_TO_B(inode, map->m_lblk);
+ loff_t length = EXT4_LBLK_TO_B(inode, map->m_len);
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode,
@@ -838,12 +849,8 @@ found:
return ret;
}
}
- if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN ||
- map->m_flags & EXT4_MAP_MAPPED))
- ext4_fc_track_range(handle, inode, map->m_lblk,
- map->m_lblk + map->m_len - 1);
- if (retval < 0)
- ext_debug(inode, "failed with err %d\n", retval);
+ ext4_fc_track_range(handle, inode, map->m_lblk, map->m_lblk +
+ map->m_len - 1);
return retval;
}
@@ -1162,8 +1169,7 @@ int ext4_block_write_begin(handle_t *handle, struct folio *folio,
unsigned block_start, block_end;
sector_t block;
int err = 0;
- unsigned blocksize = inode->i_sb->s_blocksize;
- unsigned bbits;
+ unsigned int blocksize = i_blocksize(inode);
struct buffer_head *bh, *head, *wait[2];
int nr_wait = 0;
int i;
@@ -1172,12 +1178,12 @@ int ext4_block_write_begin(handle_t *handle, struct folio *folio,
BUG_ON(!folio_test_locked(folio));
BUG_ON(to > folio_size(folio));
BUG_ON(from > to);
+ WARN_ON_ONCE(blocksize > folio_size(folio));
head = folio_buffers(folio);
if (!head)
head = create_empty_buffers(folio, blocksize, 0);
- bbits = ilog2(blocksize);
- block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
+ block = EXT4_PG_TO_LBLK(inode, folio->index);
for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) {
@@ -1907,7 +1913,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
ext4_check_map_extents_env(inode);
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) {
map->m_len = min_t(unsigned int, map->m_len,
es.es_len - (map->m_lblk - es.es_lblk));
@@ -1960,7 +1966,7 @@ add_delayed:
* is held in write mode, before inserting a new da entry in
* the extent status tree.
*/
- if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, NULL)) {
map->m_len = min_t(unsigned int, map->m_len,
es.es_len - (map->m_lblk - es.es_lblk));
@@ -1978,6 +1984,8 @@ add_delayed:
map->m_flags |= EXT4_MAP_DELAYED;
retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
+ if (!retval)
+ map->m_seq = READ_ONCE(EXT4_I(inode)->i_es_seq);
up_write(&EXT4_I(inode)->i_data_sem);
return retval;
@@ -2224,7 +2232,6 @@ static int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio,
ext4_lblk_t lblk = *m_lblk;
ext4_fsblk_t pblock = *m_pblk;
int err = 0;
- int blkbits = mpd->inode->i_blkbits;
ssize_t io_end_size = 0;
struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
@@ -2250,7 +2257,8 @@ static int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio,
err = PTR_ERR(io_end_vec);
goto out;
}
- io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits;
+ io_end_vec->offset = EXT4_LBLK_TO_B(mpd->inode,
+ mpd->map.m_lblk);
}
*map_bh = true;
goto out;
@@ -2260,7 +2268,7 @@ static int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio,
bh->b_blocknr = pblock++;
}
clear_buffer_unwritten(bh);
- io_end_size += (1 << blkbits);
+ io_end_size += i_blocksize(mpd->inode);
} while (lblk++, (bh = bh->b_this_page) != head);
io_end_vec->size += io_end_size;
@@ -2290,15 +2298,14 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
struct folio_batch fbatch;
unsigned nr, i;
struct inode *inode = mpd->inode;
- int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
pgoff_t start, end;
ext4_lblk_t lblk;
ext4_fsblk_t pblock;
int err;
bool map_bh = false;
- start = mpd->map.m_lblk >> bpp_bits;
- end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
+ start = EXT4_LBLK_TO_PG(inode, mpd->map.m_lblk);
+ end = EXT4_LBLK_TO_PG(inode, mpd->map.m_lblk + mpd->map.m_len - 1);
pblock = mpd->map.m_pblk;
folio_batch_init(&fbatch);
@@ -2309,7 +2316,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
for (i = 0; i < nr; i++) {
struct folio *folio = fbatch.folios[i];
- lblk = folio->index << bpp_bits;
+ lblk = EXT4_PG_TO_LBLK(inode, folio->index);
err = mpage_process_folio(mpd, folio, &lblk, &pblock,
&map_bh);
/*
@@ -2462,7 +2469,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
io_end_vec = ext4_alloc_io_end_vec(io_end);
if (IS_ERR(io_end_vec))
return PTR_ERR(io_end_vec);
- io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
+ io_end_vec->offset = EXT4_LBLK_TO_B(inode, map->m_lblk);
do {
err = mpage_map_one_extent(handle, mpd);
if (err < 0) {
@@ -2612,7 +2619,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
pgoff_t end = mpd->end_pos >> PAGE_SHIFT;
xa_mark_t tag;
int i, err = 0;
- int blkbits = mpd->inode->i_blkbits;
ext4_lblk_t lblk;
struct buffer_head *head;
handle_t *handle = NULL;
@@ -2648,7 +2654,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
*/
if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
mpd->wbc->nr_to_write <=
- mpd->map.m_len >> (PAGE_SHIFT - blkbits))
+ EXT4_LBLK_TO_PG(mpd->inode, mpd->map.m_len))
goto out;
/* If we can't merge this page, we are done. */
@@ -2726,8 +2732,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
mpage_folio_done(mpd, folio);
} else {
/* Add all dirty buffers to mpd */
- lblk = ((ext4_lblk_t)folio->index) <<
- (PAGE_SHIFT - blkbits);
+ lblk = EXT4_PG_TO_LBLK(mpd->inode, folio->index);
head = folio_buffers(folio);
err = mpage_process_page_bufs(mpd, head, head,
lblk);
@@ -3499,8 +3504,8 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
else
iomap->bdev = inode->i_sb->s_bdev;
- iomap->offset = (u64) map->m_lblk << blkbits;
- iomap->length = (u64) map->m_len << blkbits;
+ iomap->offset = EXT4_LBLK_TO_B(inode, map->m_lblk);
+ iomap->length = EXT4_LBLK_TO_B(inode, map->m_len);
if ((map->m_flags & EXT4_MAP_MAPPED) &&
!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -3540,7 +3545,7 @@ static int ext4_map_blocks_atomic_write_slow(handle_t *handle,
ext4_lblk_t m_lblk = map->m_lblk;
unsigned int m_len = map->m_len;
unsigned int mapped_len = 0, m_flags = 0;
- ext4_fsblk_t next_pblk;
+ ext4_fsblk_t next_pblk = 0;
bool check_next_pblk = false;
int ret = 0;
@@ -3674,7 +3679,6 @@ static int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map,
unsigned int flags)
{
handle_t *handle;
- u8 blkbits = inode->i_blkbits;
int ret, dio_credits, m_flags = 0, retries = 0;
bool force_commit = false;
@@ -3733,7 +3737,7 @@ retry:
* i_disksize out to i_size. This could be beyond where direct I/O is
* happening and thus expose allocated blocks to direct I/O reads.
*/
- else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode))
+ else if (EXT4_LBLK_TO_B(inode, map->m_lblk) >= i_size_read(inode))
m_flags = EXT4_GET_BLOCKS_CREATE;
else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;
@@ -4068,7 +4072,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
blocksize = inode->i_sb->s_blocksize;
- iblock = folio->index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
+ iblock = EXT4_PG_TO_LBLK(inode, folio->index);
bh = folio_buffers(folio);
if (!bh)
@@ -4153,9 +4157,8 @@ static int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
struct inode *inode = mapping->host;
- unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize = inode->i_sb->s_blocksize;
- unsigned max = blocksize - (offset & (blocksize - 1));
+ unsigned int max = blocksize - (from & (blocksize - 1));
/*
* correct length if it does not fall between
@@ -4180,7 +4183,6 @@ static int ext4_block_zero_page_range(handle_t *handle,
static int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from)
{
- unsigned offset = from & (PAGE_SIZE-1);
unsigned length;
unsigned blocksize;
struct inode *inode = mapping->host;
@@ -4189,8 +4191,8 @@ static int ext4_block_truncate_page(handle_t *handle,
if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode))
return 0;
- blocksize = inode->i_sb->s_blocksize;
- length = blocksize - (offset & (blocksize - 1));
+ blocksize = i_blocksize(inode);
+ length = blocksize - (from & (blocksize - 1));
return ext4_block_zero_page_range(handle, mapping, from, length);
}
@@ -4396,10 +4398,10 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
/*
* If the hole extends beyond i_size, set the hole to end after
- * the page that contains i_size.
+ * the block that contains i_size to save pointless tail block zeroing.
*/
- if (end > inode->i_size)
- end = round_up(inode->i_size, PAGE_SIZE);
+ if (end >= inode->i_size)
+ end = round_up(inode->i_size, sb->s_blocksize);
if (end > max_end)
end = max_end;
length = end - offset;
@@ -5142,37 +5144,23 @@ error:
return -EFSCORRUPTED;
}
-static bool ext4_should_enable_large_folio(struct inode *inode)
+void ext4_set_inode_mapping_order(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
+ u16 min_order, max_order;
- if (!S_ISREG(inode->i_mode))
- return false;
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
- ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return false;
- if (ext4_has_feature_verity(sb))
- return false;
- if (ext4_has_feature_encrypt(sb))
- return false;
-
- return true;
-}
+ max_order = EXT4_SB(sb)->s_max_folio_order;
+ if (!max_order)
+ return;
-/*
- * Limit the maximum folio order to 2048 blocks to prevent overestimation
- * of reserve handle credits during the folio writeback in environments
- * where the PAGE_SIZE exceeds 4KB.
- */
-#define EXT4_MAX_PAGECACHE_ORDER(i) \
- umin(MAX_PAGECACHE_ORDER, (11 + (i)->i_blkbits - PAGE_SHIFT))
-void ext4_set_inode_mapping_order(struct inode *inode)
-{
- if (!ext4_should_enable_large_folio(inode))
+ min_order = EXT4_SB(sb)->s_min_folio_order;
+ if (!min_order && !S_ISREG(inode->i_mode))
return;
- mapping_set_folio_order_range(inode->i_mapping, 0,
- EXT4_MAX_PAGECACHE_ORDER(inode));
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
+ max_order = min_order;
+
+ mapping_set_folio_order_range(inode->i_mapping, min_order, max_order);
}
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -5284,7 +5272,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
ei->i_inline_off = 0;
ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
@@ -5517,7 +5504,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (ret)
goto bad_inode;
brelse(iloc.bh);
-
+ /* Initialize the "no ACL's" state for the simple cases */
+ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) && !ei->i_file_acl)
+ cache_no_acl(inode);
unlock_new_inode(inode);
return inode;
@@ -6548,14 +6537,14 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
* dirty data which can be converted only after flushing the dirty
* data (and journalled aops don't know how to handle these cases).
*/
- if (val) {
- filemap_invalidate_lock(inode->i_mapping);
- err = filemap_write_and_wait(inode->i_mapping);
- if (err < 0) {
- filemap_invalidate_unlock(inode->i_mapping);
- return err;
- }
+ filemap_invalidate_lock(inode->i_mapping);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err < 0) {
+ filemap_invalidate_unlock(inode->i_mapping);
+ return err;
}
+ /* Before switch the inode journalling mode evict all the page cache. */
+ truncate_pagecache(inode, 0);
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
jbd2_journal_lock_updates(journal);
@@ -6575,17 +6564,17 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (err < 0) {
jbd2_journal_unlock_updates(journal);
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
+ filemap_invalidate_unlock(inode->i_mapping);
return err;
}
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
+ ext4_set_inode_mapping_order(inode);
jbd2_journal_unlock_updates(journal);
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
-
- if (val)
- filemap_invalidate_unlock(inode->i_mapping);
+ filemap_invalidate_unlock(inode->i_mapping);
/* Finally we can mark the inode as dirty. */