From 6b520e0565422966cdf1c3759bd73df77b0f248c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Dec 2011 15:51:45 -0500 Subject: vfs: fix the stupidity with i_dentry in inode destructors Seeing that just about every destructor got that INIT_LIST_HEAD() copied into it, there is no point whatsoever keeping this INIT_LIST_HEAD in inode_init_once(); the cost of taking it into inode_init_always() will be negligible for pipes and sockets and negative for everything else. Not to mention the removal of boilerplate code from ->destroy_inode() instances... Signed-off-by: Al Viro --- fs/xfs/xfs_iget.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_iget.c') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0fa98b1c70ea..3960a066d7ff 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -107,7 +107,6 @@ xfs_inode_free_callback( struct inode *inode = container_of(head, struct inode, i_rcu); struct xfs_inode *ip = XFS_I(inode); - INIT_LIST_HEAD(&inode->i_dentry); kmem_zone_free(xfs_inode_zone, ip); } -- cgit From 8096b1ebb59b94b3bc6abb6b7d121419e83447ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:07 +0000 Subject: xfs: remove the if_ext_max field in struct xfs_ifork We spent a lot of effort to maintain this field, but it always equals to the fork size divided by the constant size of an extent. The prime use of it is to assert that the two stay in sync. Just divide the fork size by the extent size in the few places that we actually use it and remove the overhead of maintaining it. Also introduce a few helpers to consolidate the places where we actually care about the value. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_iget.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/xfs/xfs_iget.c') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 3960a066d7ff..f180ce896cd7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -450,8 +450,6 @@ again: *ipp = ip; - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. -- cgit From 474fce067521a40dbacc722e8ba119e81c2d31bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:09 +0000 Subject: xfs: replace i_flock with a sleeping bitlock We almost never block on i_flock, the exception is synchronous inode flushing. Instead of bloating the inode with a 16/24-byte completion that we abuse as a semaphore just implement it as a bitlock that uses a bit waitqueue for the rare sleeping path. This primarily is a tradeoff between a much smaller inode and a faster non-blocking path vs faster wakeups, and we are much better off with the former. A small downside is that we will lose lockdep checking for i_flock, but given that it's always taken inside the ilock that should be acceptable. Note that for example the inode writeback locking is implemented in a very similar way. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Signed-off-by: Ben Myers --- fs/xfs/xfs_iget.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_iget.c') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f180ce896cd7..a7cf7139f9ad 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -77,7 +77,7 @@ xfs_inode_alloc( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); + ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); @@ -150,7 +150,7 @@ xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); + ASSERT(!xfs_isiflocked(ip)); /* * Because we use RCU freeing we need to ensure the inode always @@ -713,3 +713,19 @@ xfs_isilocked( return 0; } #endif + +void +__xfs_iflock( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); + + do { + prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + if (xfs_isiflocked(ip)) + io_schedule(); + } while (!xfs_iflock_nowait(ip)); + + finish_wait(wq, &wait.wait); +} -- cgit From ce7ae151ddada3dbf67301464343c154903166b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:11 +0000 Subject: xfs: remove the i_size field in struct xfs_inode There is no fundamental need to keep an in-memory inode size copy in the XFS inode. We already have the on-disk value in the dinode, and the separate in-memory copy that we need for regular files only in the XFS inode. Remove the xfs_inode i_size field and change the XFS_ISIZE macro to use the VFS inode i_size field for regular files. Switch code that was directly accessing the i_size field in the xfs_inode to XFS_ISIZE, or in cases where we are limited to regular files direct access of the VFS inode i_size field. This also allows dropping some fairly complicated code in the write path which dealt with keeping the xfs_inode i_size uptodate with the VFS i_size that is getting updated inside ->write_end. Note that we do not bother resetting the VFS i_size when truncating a file that gets freed to zero as there is no point in doing so because the VFS inode is no longer in use at this point. Just relax the assert in xfs_ifree to only check the on-disk size instead. Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_iget.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_iget.c') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a7cf7139f9ad..3b5b78aa3b87 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -94,7 +94,6 @@ xfs_inode_alloc( ip->i_update_core = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); - ip->i_size = 0; ip->i_new_size = 0; return ip; -- cgit From 2813d682e8e6a278f94817429afd46b30875bb6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Dec 2011 20:00:12 +0000 Subject: xfs: remove the i_new_size field in struct xfs_inode Now that we use the VFS i_size field throughout XFS there is no need for the i_new_size field any more given that the VFS i_size field gets updated in ->write_end before unlocking the page, and thus is always uptodate when writeback could see a page. Removing i_new_size also has the advantage that we will never have to trim back di_size during a failed buffered write, given that it never gets updated past i_size. Note that currently the generic direct I/O code only updates i_size after calling our end_io handler, which requires a small workaround to make sure di_size actually makes it to disk. I hope to fix this properly in the generic code. A downside is that we lose the support for parallel non-overlapping O_DIRECT appending writes that recently was added. I don't think keeping the complex and fragile i_new_size infrastructure for this is a good tradeoff - if we really care about parallel appending writers we should investigate turning the iolock into a range lock, which would also allow for parallel non-overlapping buffered writers. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_iget.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_iget.c') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 3b5b78aa3b87..8c3e46394d48 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -94,7 +94,6 @@ xfs_inode_alloc( ip->i_update_core = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); - ip->i_new_size = 0; return ip; } -- cgit