summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_trans_resv.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_trans_resv.c')
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c343
1 files changed, 312 insertions, 31 deletions
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 13d00c7166e1..86a111d0f2fc 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -22,6 +22,12 @@
#include "xfs_rtbitmap.h"
#include "xfs_attr_item.h"
#include "xfs_log.h"
+#include "xfs_defer.h"
+#include "xfs_bmap_item.h"
+#include "xfs_extfree_item.h"
+#include "xfs_rmap_item.h"
+#include "xfs_refcount_item.h"
+#include "xfs_trace.h"
#define _ALLOC true
#define _FREE false
@@ -264,6 +270,42 @@ xfs_rtalloc_block_count(
*/
/*
+ * Finishing a data device refcount updates (t1):
+ * the agfs of the ags containing the blocks: nr_ops * sector size
+ * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_reflink(mp))
+ return 0;
+
+ return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Realtime refcount updates (t2);
+ * the rt refcount inode
+ * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_cui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr_ops)
+{
+ if (!xfs_has_rtreflink(mp))
+ return 0;
+
+ return xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* Compute the log reservation required to handle the refcount update
* transaction. Refcount updates are always done via deferred log items.
*
@@ -280,19 +322,10 @@ xfs_calc_refcountbt_reservation(
struct xfs_mount *mp,
unsigned int nr_ops)
{
- unsigned int blksz = XFS_FSB_TO_B(mp, 1);
- unsigned int t1, t2 = 0;
+ unsigned int t1, t2;
- if (!xfs_has_reflink(mp))
- return 0;
-
- t1 = xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
-
- if (xfs_has_realtime(mp))
- t2 = xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
- blksz);
+ t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
+ t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
return max(t1, t2);
}
@@ -380,6 +413,96 @@ xfs_calc_write_reservation_minlogsize(
}
/*
+ * Finishing an EFI can free the blocks and bmap blocks (t2):
+ * the agf for each of the ags: nr * sector size
+ * the agfl for each of the ags: nr * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming nr extents:
+ * nr exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap:
+ * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_rt_efi_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_realtime(mp))
+ return 0;
+
+ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
+ mp->m_sb.sb_blocksize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
+ mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rmapbt(mp))
+ return 0;
+ return xfs_calc_finish_efi_reservation(mp, nr);
+}
+
+/*
+ * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
+ * on each end of the record, and that can cause the AGFL to be refilled or
+ * emptied out.
+ */
+inline unsigned int
+xfs_calc_finish_rt_rui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ if (!xfs_has_rtrmapbt(mp))
+ return 0;
+ return xfs_calc_finish_rt_efi_reservation(mp, nr);
+}
+
+/*
+ * In finishing a BUI, we can modify:
+ * the inode being truncated: inode size
+ * dquots
+ * the inode's bmap btree: (max depth + 1) * block size
+ */
+inline unsigned int
+xfs_calc_finish_bui_reservation(
+ struct xfs_mount *mp,
+ unsigned int nr)
+{
+ return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+ mp->m_sb.sb_blocksize);
+}
+
+/*
* In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
@@ -411,16 +534,8 @@ xfs_calc_itruncate_reservation(
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
- t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
-
- if (xfs_has_realtime(mp)) {
- t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
- } else {
- t3 = 0;
- }
+ t2 = xfs_calc_finish_efi_reservation(mp, 4);
+ t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
/*
* In the early days of reflink, we included enough reservation to log
@@ -501,9 +616,7 @@ xfs_calc_rename_reservation(
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 3);
if (xfs_has_parent(mp)) {
unsigned int rename_overhead, exchange_overhead;
@@ -611,9 +724,7 @@ xfs_calc_link_reservation(
overhead += xfs_calc_iunlink_remove_reservation(mp);
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 1);
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrsetm.tr_logres;
@@ -676,9 +787,7 @@ xfs_calc_remove_reservation(
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
- t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
- XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_finish_efi_reservation(mp, 2);
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrrm.tr_logres;
@@ -1181,6 +1290,15 @@ xfs_calc_namespace_reservations(
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
}
+STATIC void
+xfs_calc_default_atomic_ioend_reservation(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* Pick a default that will scale reasonably for the log size. */
+ resp->tr_atomic_ioend = resp->tr_itruncate;
+}
+
void
xfs_trans_resv_calc(
struct xfs_mount *mp,
@@ -1275,4 +1393,167 @@ xfs_trans_resv_calc(
resp->tr_itruncate.tr_logcount += logcount_adj;
resp->tr_write.tr_logcount += logcount_adj;
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
+
+ /*
+ * Now that we've finished computing the static reservations, we can
+ * compute the dynamic reservation for atomic writes.
+ */
+ xfs_calc_default_atomic_ioend_reservation(mp, resp);
+}
+
+/*
+ * Return the per-extent and fixed transaction reservation sizes needed to
+ * complete an atomic write.
+ */
+STATIC unsigned int
+xfs_calc_atomic_write_ioend_geometry(
+ struct xfs_mount *mp,
+ unsigned int *step_size)
+{
+ const unsigned int efi = xfs_efi_log_space(1);
+ const unsigned int efd = xfs_efd_log_space(1);
+ const unsigned int rui = xfs_rui_log_space(1);
+ const unsigned int rud = xfs_rud_log_space();
+ const unsigned int cui = xfs_cui_log_space(1);
+ const unsigned int cud = xfs_cud_log_space();
+ const unsigned int bui = xfs_bui_log_space(1);
+ const unsigned int bud = xfs_bud_log_space();
+
+ /*
+ * Maximum overhead to complete an atomic write ioend in software:
+ * remove data fork extent + remove cow fork extent + map extent into
+ * data fork.
+ *
+ * tx0: Creates a BUI and a CUI and that's all it needs.
+ *
+ * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
+ * enough space to relog the CUI (== CUI + CUD).
+ *
+ * tx2: Roll again to finish the RUI. Need space for the RUD and space
+ * to relog the CUI.
+ *
+ * tx3: Roll again, need space for the CUD and possibly a new EFI.
+ *
+ * tx4: Roll again, need space for an EFD.
+ *
+ * If the extent referenced by the pair of BUI/CUI items is not the one
+ * being currently processed, then we need to reserve space to relog
+ * both items.
+ */
+ const unsigned int tx0 = bui + cui;
+ const unsigned int tx1 = bud + rui + cui + cud;
+ const unsigned int tx2 = rud + cui + cud;
+ const unsigned int tx3 = cud + efi;
+ const unsigned int tx4 = efd;
+ const unsigned int relog = bui + bud + cui + cud;
+
+ const unsigned int per_intent = max(max3(tx0, tx1, tx2),
+ max3(tx3, tx4, relog));
+
+ /* Overhead to finish one step of each intent item type */
+ const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
+ const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
+ const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
+ const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
+
+ /* We only finish one item per transaction in a chain */
+ *step_size = max(f4, max3(f1, f2, f3));
+
+ return per_intent;
+}
+
+/*
+ * Compute the maximum size (in fsblocks) of atomic writes that we can complete
+ * given the existing log reservations.
+ */
+xfs_extlen_t
+xfs_calc_max_atomic_write_fsblocks(
+ struct xfs_mount *mp)
+{
+ const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
+ unsigned int per_intent = 0;
+ unsigned int step_size = 0;
+ unsigned int ret = 0;
+
+ if (resv->tr_logres > 0) {
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
+ &step_size);
+
+ if (resv->tr_logres >= step_size)
+ ret = (resv->tr_logres - step_size) / per_intent;
+ }
+
+ trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
+ resv->tr_logres, ret);
+
+ return ret;
+}
+
+/*
+ * Compute the log blocks and transaction reservation needed to complete an
+ * atomic write of a given number of blocks. Worst case, each block requires
+ * separate handling. A return value of 0 means something went wrong.
+ */
+xfs_extlen_t
+xfs_calc_atomic_write_log_geometry(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount,
+ unsigned int *new_logres)
+{
+ struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
+ uint old_logres = curr_res->tr_logres;
+ unsigned int per_intent, step_size;
+ unsigned int logres;
+ xfs_extlen_t min_logblocks;
+
+ ASSERT(blockcount > 0);
+
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
+
+ /* Check for overflows */
+ if (check_mul_overflow(blockcount, per_intent, &logres) ||
+ check_add_overflow(logres, step_size, &logres))
+ return 0;
+
+ curr_res->tr_logres = logres;
+ min_logblocks = xfs_log_calc_minimum_size(mp);
+ curr_res->tr_logres = old_logres;
+
+ trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
+ blockcount, min_logblocks, logres);
+
+ *new_logres = logres;
+ return min_logblocks;
+}
+
+/*
+ * Compute the transaction reservation needed to complete an out of place
+ * atomic write of a given number of blocks.
+ */
+int
+xfs_calc_atomic_write_reservation(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount)
+{
+ unsigned int new_logres;
+ xfs_extlen_t min_logblocks;
+
+ /*
+ * If the caller doesn't ask for a specific atomic write size, then
+ * use the defaults.
+ */
+ if (blockcount == 0) {
+ xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
+ return 0;
+ }
+
+ min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
+ &new_logres);
+ if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
+ return -EINVAL;
+
+ M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
+ return 0;
}