diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2025-11-19 15:26:25 +0100 |
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2025-11-19 15:26:25 +0100 |
| commit | d4f14fa5c6c4d39b5187e1f410d59e38d76f283f (patch) | |
| tree | 8666bd4fcd78149936221a64707c4dc6ca75a9f8 /fs/xfs | |
| parent | 949f1fd2225baefbea2995afa807dba5cbdb6bd3 (diff) | |
| parent | 6a23ae0a96a600d1d12557add110e0bb6e32730c (diff) | |
Merge drm/drm-fixes into drm-misc-fixes
Backmerging to get fixes from v6.18-rc6.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/libxfs/xfs_rtgroup.h | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_discard.c | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 82 | ||||
| -rw-r--r-- | fs/xfs/xfs_zone_alloc.c | 14 | ||||
| -rw-r--r-- | fs/xfs/xfs_zone_gc.c | 27 |
5 files changed, 117 insertions, 16 deletions
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index d36a6ae0abe5..d4fcf591e63d 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -50,6 +50,12 @@ struct xfs_rtgroup { uint8_t *rtg_rsum_cache; struct xfs_open_zone *rtg_open_zone; }; + + /* + * Count of outstanding GC operations for zoned XFS. Any RTG with a + * non-zero rtg_gccount will not be picked as new GC victim. + */ + atomic_t rtg_gccount; }; /* diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index ee49f20875af..6917de832191 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -726,8 +726,10 @@ xfs_trim_rtgroup_extents( break; } - if (!tr.queued) + if (!tr.queued) { + kfree(tr.extents); break; + } /* * We hand the extent list to the discard function here so the diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d3f6e3e42a11..490e12cb99be 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1091,6 +1091,29 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = { }; #endif /* CONFIG_XFS_RT */ +#ifdef DEBUG +static void +xfs_check_atomic_cow_conversion( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + const struct xfs_bmbt_irec *cmap) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec cmap2 = { }; + + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap2)) + xfs_trim_extent(&cmap2, offset_fsb, count_fsb); + + ASSERT(cmap2.br_startoff == cmap->br_startoff); + ASSERT(cmap2.br_blockcount == cmap->br_blockcount); + ASSERT(cmap2.br_startblock == cmap->br_startblock); + ASSERT(cmap2.br_state == cmap->br_state); +} +#else +# define xfs_check_atomic_cow_conversion(...) ((void)0) +#endif + static int xfs_atomic_write_cow_iomap_begin( struct inode *inode, @@ -1102,9 +1125,10 @@ xfs_atomic_write_cow_iomap_begin( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); - xfs_filblks_t count_fsb = end_fsb - offset_fsb; + const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + const xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); + const xfs_filblks_t count_fsb = end_fsb - offset_fsb; + xfs_filblks_t hole_count_fsb; int nmaps = 1; xfs_filblks_t resaligned; struct xfs_bmbt_irec cmap; @@ -1130,7 +1154,7 @@ xfs_atomic_write_cow_iomap_begin( return -EAGAIN; trace_xfs_iomap_atomic_write_cow(ip, offset, length); - +retry: xfs_ilock(ip, XFS_ILOCK_EXCL); if (!ip->i_cowfp) { @@ -1141,14 +1165,22 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; + + /* + * cmap could extend outside the write range due to previous + * speculative preallocations. We must trim cmap to the write + * range because the cow fork treats written mappings to mean + * "write in progress". + */ xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } - end_fsb = cmap.br_startoff; - count_fsb = end_fsb - offset_fsb; + hole_count_fsb = cmap.br_startoff - offset_fsb; - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, + resaligned = xfs_aligned_fsb_count(offset_fsb, hole_count_fsb, xfs_get_cowextsz_hint(ip)); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1169,8 +1201,10 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { - xfs_trim_extent(&cmap, offset_fsb, count_fsb); xfs_trans_cancel(tp); + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; + xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } @@ -1182,7 +1216,7 @@ xfs_atomic_write_cow_iomap_begin( * atomic writes to that same range will be aligned (and don't require * this COW-based method). */ - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + error = xfs_bmapi_write(tp, ip, offset_fsb, hole_count_fsb, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps); if (error) { @@ -1195,21 +1229,43 @@ xfs_atomic_write_cow_iomap_begin( if (error) goto out_unlock; + /* + * cmap could map more blocks than the range we passed into bmapi_write + * because of EXTSZALIGN or adjacent pre-existing unwritten mappings + * that were merged. Trim cmap to the original write range so that we + * don't convert more than we were asked to do for this write. + */ + xfs_trim_extent(&cmap, offset_fsb, count_fsb); + found: if (cmap.br_state != XFS_EXT_NORM) { - error = xfs_reflink_convert_cow_locked(ip, offset_fsb, - count_fsb); + error = xfs_reflink_convert_cow_locked(ip, cmap.br_startoff, + cmap.br_blockcount); if (error) goto out_unlock; cmap.br_state = XFS_EXT_NORM; + xfs_check_atomic_cow_conversion(ip, offset_fsb, count_fsb, + &cmap); } - length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); - trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + trace_xfs_iomap_found(ip, offset, length, XFS_COW_FORK, &cmap); seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); +convert_delay: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_convert_delalloc(ip, XFS_COW_FORK, offset, iomap, + NULL); + if (error) + return error; + + /* + * Try the lookup again, because the delalloc conversion might have + * turned the COW mapping into unwritten, but we need it to be in + * written state. + */ + goto retry; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 23cdab4515bb..ef7a931ebde5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -246,6 +246,14 @@ xfs_zoned_map_extent( * If a data write raced with this GC write, keep the existing data in * the data fork, mark our newly written GC extent as reclaimable, then * move on to the next extent. + * + * Note that this can also happen when racing with operations that do + * not actually invalidate the data, but just move it to a different + * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the + * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the + * data was just moved around, GC fails to free the zone, but the zone + * becomes a GC candidate again as soon as all previous GC I/O has + * finished and these blocks will be moved out eventually. */ if (old_startblock != NULLFSBLOCK && old_startblock != data.br_startblock) @@ -607,7 +615,7 @@ xfs_select_open_zone_mru( lockdep_assert_held(&zi->zi_open_zones_lock); list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry) - if (xfs_try_use_zone(zi, file_hint, oz, false)) + if (xfs_try_use_zone(zi, file_hint, oz, XFS_ZONE_ALLOC_OK)) return oz; cond_resched_lock(&zi->zi_open_zones_lock); @@ -1241,8 +1249,10 @@ xfs_mount_zones( while ((rtg = xfs_rtgroup_next(mp, rtg))) { error = xfs_init_zone(&iz, rtg, NULL); - if (error) + if (error) { + xfs_rtgroup_rele(rtg); goto out_free_zone_info; + } } } diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 109877d9a6bf..4ade54445532 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -114,6 +114,8 @@ struct xfs_gc_bio { /* Open Zone being written to */ struct xfs_open_zone *oz; + struct xfs_rtgroup *victim_rtg; + /* Bio used for reads and writes, including the bvec used by it */ struct bio_vec bv; struct bio bio; /* must be last */ @@ -264,6 +266,7 @@ xfs_zone_gc_iter_init( iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; + atomic_inc(&victim_rtg->rtg_gccount); } /* @@ -362,6 +365,7 @@ xfs_zone_gc_query( return 0; done: + atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; @@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from( if (!rtg) continue; + /* + * If the zone is already undergoing GC, don't pick it again. + * + * This prevents us from picking one of the zones for which we + * already submitted GC I/O, but for which the remapping hasn't + * concluded yet. This won't cause data corruption, but + * increases write amplification and slows down GC, so this is + * a bad thing. + */ + if (atomic_read(&rtg->rtg_gccount)) { + xfs_rtgroup_rele(rtg); + continue; + } + /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { @@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk( chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; + chunk->victim_rtg = iter->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; @@ -710,6 +731,8 @@ static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { + atomic_dec(&chunk->victim_rtg->rtg_gccount); + xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); @@ -770,6 +793,10 @@ xfs_zone_gc_split_write( split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); + split_chunk->victim_rtg = chunk->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); + chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); |
