From 93b8a5854f247138e401471a9c3b82ccb62ff608 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Dec 2011 21:58:07 +0000 Subject: xfs: remove the deprecated nodelaylog option The delaylog mode has been the default for a long time, and the nodelaylog option has been scheduled for removal in Linux 3.3. Remove it and code only used by it now that we have opened the 3.3 window. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3eca58f51ae9..0e76348d958a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,7 +199,6 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_flags |= XFS_MOUNT_DELAYLOG; /* * These can be overridden by the mount option parsing. @@ -353,11 +352,11 @@ xfs_parseargs( mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { - mp->m_flags |= XFS_MOUNT_DELAYLOG; + xfs_warn(mp, + "delaylog is the default now, option is deprecated."); } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { - mp->m_flags &= ~XFS_MOUNT_DELAYLOG; xfs_warn(mp, - "nodelaylog is deprecated and will be removed in Linux 3.3"); + "nodelaylog support has been removed, option is deprecated."); } else if (!strcmp(this_char, MNTOPT_DISCARD)) { mp->m_flags |= XFS_MOUNT_DISCARD; } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { @@ -395,13 +394,6 @@ xfs_parseargs( return EINVAL; } - if ((mp->m_flags & XFS_MOUNT_DISCARD) && - !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { - xfs_warn(mp, - "the discard option is incompatible with the nodelaylog option"); - return EINVAL; - } - #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { xfs_warn(mp, "quota support not available in this kernel."); @@ -501,7 +493,6 @@ xfs_showargs( { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, - { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, { 0, NULL } }; -- cgit From 34625c661b01dab193c7e8a0151a63553e97cfdf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Dec 2011 21:58:12 +0000 Subject: xfs: remove xfs_qm_sync Now that we can't have any dirty dquots around that aren't in the AIL we can get rid of the explicit dquot syncing from xfssyncd and xfs_fs_sync_fs and instead rely on AIL pushing to write out any quota updates. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0e76348d958a..88cd0c893163 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1025,17 +1025,10 @@ xfs_fs_sync_fs( int error; /* - * Not much we can do for the first async pass. Writing out the - * superblock would be counter-productive as we are going to redirty - * when writing out other data and metadata (and writing out a single - * block is quite fast anyway). - * - * Try to asynchronously kick off quota syncing at least. + * Doing anything during the async pass would be counterproductive. */ - if (!wait) { - xfs_qm_sync(mp, SYNC_TRYLOCK); + if (!wait) return 0; - } error = xfs_quiesce_data(mp); if (error) -- cgit From 40d344ec5ee440596b1f3ae87556e20c7197757a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Dec 2011 08:53:21 +0000 Subject: xfs: mark the xfssyncd workqueue as non-reentrant On a system with lots of memory pressure that is stuck on synchronous inode reclaim the workqueue code will run one instance of the inode reclaim work item on every CPU. which is not what we want. Make sure to mark the xfssyncd workqueue as non-reentrant to make sure there only is one instace of each running globally. Also stop using special paramater for the workqueue; now that we guarantee each fs has only running one of each works at a time there is no need to artificially lower max_active and compensate for that by setting the WQ_CPU_INTENSIVE flag. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 88cd0c893163..5f955f42377e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1625,12 +1625,12 @@ STATIC int __init xfs_init_workqueues(void) { /* - * max_active is set to 8 to give enough concurency to allow - * multiple work operations on each CPU to run. This allows multiple - * filesystems to be running sync work concurrently, and scales with - * the number of CPUs in the system. + * We never want to the same work item to run twice, reclaiming inodes + * or idling the log is not going to get any faster by multiple CPUs + * competing for ressources. Use the default large max_active value + * so that even lots of filesystems can perform these task in parallel. */ - xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); if (!xfs_syncd_wq) return -ENOMEM; return 0; -- cgit From 0b8fd3033c308e4088760aa1d38ce77197b4e074 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 15:49:55 +0000 Subject: xfs: log the inode in ->write_inode calls for kupdate If the writeback code writes back an inode because it has expired we currently use the non-blockin ->write_inode path. This means any inode that is pinned is skipped. With delayed logging and a workload that has very little log traffic otherwise it is very likely that an inode that gets constantly written to is always pinned, and thus we keep refusing to write it. The VM writeback code at that point redirties it and doesn't try to write it again for another 30 seconds. This means under certain scenarious time based metadata writeback never happens. Fix this by calling into xfs_log_inode for kupdate in addition to data integrity syncs, and thus transfer the inode to the log ASAP. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Tested-by: Mark Tinguely Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3eca58f51ae9..1add17ca3350 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -905,7 +905,7 @@ xfs_fs_write_inode( if (!ip->i_update_core) return 0; - if (wbc->sync_mode == WB_SYNC_ALL) { + if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) { /* * Make sure the inode has made it it into the log. Instead * of forcing it all the way to stable storage using a -- cgit From be4f1ac828776bbc7868a68b465cd8eedb733cfd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Dec 2011 20:08:41 +0000 Subject: xfs: log all dirty inodes in xfs_fs_sync_fs Since Linux 2.6.36 the writeback code has introduces various measures for live lock prevention during sync(). Unfortunately some of these are actively harmful for the XFS model, where the inode gets marked dirty for metadata from the data I/O handler. The older_than_this checks that are now more strictly enforced since writeback: avoid livelocking WB_SYNC_ALL writeback by only calling into __writeback_inodes_sb and thus only sampling the current cut off time once. But on a slow enough devices the previous asynchronous sync pass might not have fully completed yet, and thus XFS might mark metadata dirty only after that sampling of the cut off time for the blocking pass already happened. I have not myself reproduced this myself on a real system, but by introducing artificial delay into the XFS I/O completion workqueues it can be reproduced easily. Fix this by iterating over all XFS inodes in ->sync_fs and log all that are dirty. This might log inode that only got redirtied after the previous pass, but given how cheap delayed logging of inodes is it isn't a major concern for performance. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Tested-by: Mark Tinguely Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1add17ca3350..8a899496fd5f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -868,27 +868,6 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -STATIC int -xfs_log_inode( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - return xfs_trans_commit(tp, 0); -} - STATIC int xfs_fs_write_inode( struct inode *inode, @@ -902,8 +881,6 @@ xfs_fs_write_inode( if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); - if (!ip->i_update_core) - return 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) { /* @@ -913,11 +890,14 @@ xfs_fs_write_inode( * ->sync_fs call do that for thus, which reduces the number * of synchronous log forces dramatically. */ - error = xfs_log_inode(ip); + error = xfs_log_dirty_inode(ip, NULL, 0); if (error) goto out; return 0; } else { + if (!ip->i_update_core) + return 0; + /* * We make this non-blocking if the inode is contended, return * EAGAIN to indicate to the caller that they did not succeed. -- cgit From 34c80b1d93e6e20ca9dea0baf583a5b5510d92d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Dec 2011 21:32:45 -0500 Subject: vfs: switch ->show_options() to struct dentry * Signed-off-by: Al Viro --- fs/xfs/xfs_super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8a899496fd5f..7b7669507ee3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1238,9 +1238,9 @@ xfs_fs_unfreeze( STATIC int xfs_fs_show_options( struct seq_file *m, - struct vfsmount *mnt) + struct dentry *root) { - return -xfs_showargs(XFS_M(mnt->mnt_sb), m); + return -xfs_showargs(XFS_M(root->d_sb), m); } /* -- cgit From 474fce067521a40dbacc722e8ba119e81c2d31bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:09 +0000 Subject: xfs: replace i_flock with a sleeping bitlock We almost never block on i_flock, the exception is synchronous inode flushing. Instead of bloating the inode with a 16/24-byte completion that we abuse as a semaphore just implement it as a bitlock that uses a bit waitqueue for the rare sleeping path. This primarily is a tradeoff between a much smaller inode and a faster non-blocking path vs faster wakeups, and we are much better off with the former. A small downside is that we will lose lockdep checking for i_flock, but given that it's always taken inside the ilock that should be acceptable. Note that for example the inode writeback locking is implemented in a very similar way. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 281961c1d81a..6851fa7b1afa 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -829,13 +829,6 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); init_waitqueue_head(&ip->i_ipin_wait); - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&ip->i_flush); - complete(&ip->i_flush); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); -- cgit From f392e6319a4e9a028b0c8b48f000bb01d660ad53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:10 +0000 Subject: xfs: replace i_pin_wait with a bit waitqueue Replace i_pin_wait, which is only used during synchronous inode flushing with a bit waitqueue. This trades off a much smaller inode against slightly slower wakeup performance, and saves 12 (32-bit) or 20 (64-bit) bytes in the XFS inode. Reviewed-by: Alex Elder Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_super.c') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 6851fa7b1afa..ee5b695c99a7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -828,7 +828,6 @@ xfs_fs_inode_init_once( /* xfs inode */ atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); - init_waitqueue_head(&ip->i_ipin_wait); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); -- cgit