From eaae668d01e15435cf977cced3975ccc436257fc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 12:48:09 +0000 Subject: fs/inode: Fix kernel-doc format for inode_init_owner Signed-off-by: Ben Hutchings Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 9910c039f026..16fefd373fc2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1715,7 +1715,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) EXPORT_SYMBOL(init_special_inode); /** - * Init uid,gid,mode for new inode according to posix standards + * inode_init_owner - Init uid,gid,mode for new inode according to posix standards * @inode: New inode * @dir: Directory inode * @mode: mode of the new inode -- cgit From e795b71799ff0b27365020c9ddaa25d0d83f99c8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:25 -0700 Subject: userns: userns: check user namespace for task->file uid equivalence checks Cheat for now and say all files belong to init_user_ns. Next step will be to let superblocks belong to a user_ns, and derive inode_userns(inode) from inode->i_sb->s_user_ns. Finally we'll introduce more flexible arrangements. Changelog: Feb 15: make is_owner_or_cap take const struct inode Feb 23: make is_owner_or_cap bool [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 16fefd373fc2..a21d5a938a17 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -1733,3 +1734,19 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_mode = mode; } EXPORT_SYMBOL(inode_init_owner); + +/* + * return true if current either has CAP_FOWNER to the + * file, or owns the file. + */ +bool is_owner_or_cap(const struct inode *inode) +{ + struct user_namespace *ns = inode_userns(inode); + + if (current_user_ns() == ns && current_fsuid() == inode->i_uid) + return true; + if (ns_capable(ns, CAP_FOWNER)) + return true; + return false; +} +EXPORT_SYMBOL(is_owner_or_cap); -- cgit From 2e1496707560ecf98e9b0604622c0990f94861d3 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:26 -0700 Subject: userns: rename is_owner_or_cap to inode_owner_or_capable And give it a kernel-doc comment. [akpm@linux-foundation.org: btrfs changed in linux-next] Signed-off-by: Serge E. Hallyn Cc: "Eric W. Biederman" Cc: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index a21d5a938a17..0b3da4a77704 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1735,11 +1735,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, } EXPORT_SYMBOL(inode_init_owner); -/* - * return true if current either has CAP_FOWNER to the - * file, or owns the file. +/** + * inode_owner_or_capable - check current task permissions to inode + * @inode: inode being checked + * + * Return true if current either has CAP_FOWNER to the inode, or + * owns the file. */ -bool is_owner_or_cap(const struct inode *inode) +bool inode_owner_or_capable(const struct inode *inode) { struct user_namespace *ns = inode_userns(inode); @@ -1749,4 +1752,4 @@ bool is_owner_or_cap(const struct inode *inode) return true; return false; } -EXPORT_SYMBOL(is_owner_or_cap); +EXPORT_SYMBOL(inode_owner_or_capable); -- cgit From 250df6ed274d767da844a5d9f05720b804240197 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:36 +1100 Subject: fs: protect inode->i_state with inode->i_lock Protect inode state transitions and validity checks with the inode->i_lock. This enables us to make inode state transitions independently of the inode_lock and is the first step to peeling away the inode_lock from the code. This requires that __iget() is done atomically with i_state checks during list traversals so that we don't race with another thread marking the inode I_FREEING between the state check and grabbing the reference. Also remove the unlock_new_inode() memory barrier optimisation required to avoid taking the inode_lock when clearing I_NEW. Simplify the code by simply taking the inode->i_lock around the state change and wakeup. Because the wakeup is no longer tricky, remove the wake_up_inode() function and open code the wakeup where necessary. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 150 ++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 104 insertions(+), 46 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 0b3da4a77704..14b12c4ee026 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -27,6 +27,17 @@ #include #include +/* + * inode locking rules. + * + * inode->i_lock protects: + * inode->i_state, inode->i_hash, __iget() + * + * Lock ordering: + * inode_lock + * inode->i_lock + */ + /* * This is needed for the following functions: * - inode_has_buffers @@ -137,15 +148,6 @@ int proc_nr_inodes(ctl_table *table, int write, } #endif -static void wake_up_inode(struct inode *inode) -{ - /* - * Prevent speculative execution through spin_unlock(&inode_lock); - */ - smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); -} - /** * inode_init_always - perform inode structure intialisation * @sb: superblock inode belongs to @@ -336,7 +338,7 @@ static void init_once(void *foo) } /* - * inode_lock must be held + * inode->i_lock must be held */ void __iget(struct inode *inode) { @@ -413,7 +415,9 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); spin_lock(&inode_lock); + spin_lock(&inode->i_lock); hlist_add_head(&inode->i_hash, b); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); } EXPORT_SYMBOL(__insert_inode_hash); @@ -438,7 +442,9 @@ static void __remove_inode_hash(struct inode *inode) void remove_inode_hash(struct inode *inode) { spin_lock(&inode_lock); + spin_lock(&inode->i_lock); hlist_del_init(&inode->i_hash); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); } EXPORT_SYMBOL(remove_inode_hash); @@ -495,7 +501,9 @@ static void dispose_list(struct list_head *head) __inode_sb_list_del(inode); spin_unlock(&inode_lock); - wake_up_inode(inode); + spin_lock(&inode->i_lock); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); destroy_inode(inode); } } @@ -518,10 +526,17 @@ void evict_inodes(struct super_block *sb) list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { if (atomic_read(&inode->i_count)) continue; - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) + + spin_lock(&inode->i_lock); + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + spin_unlock(&inode->i_lock); continue; + } inode->i_state |= I_FREEING; + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + inodes_stat.nr_unused--; + spin_unlock(&inode->i_lock); /* * Move the inode off the IO lists and LRU once I_FREEING is @@ -529,8 +544,6 @@ void evict_inodes(struct super_block *sb) */ list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -563,18 +576,26 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) spin_lock(&inode_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) + spin_lock(&inode->i_lock); + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + spin_unlock(&inode->i_lock); continue; + } if (inode->i_state & I_DIRTY && !kill_dirty) { + spin_unlock(&inode->i_lock); busy = 1; continue; } if (atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); busy = 1; continue; } inode->i_state |= I_FREEING; + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + inodes_stat.nr_unused--; + spin_unlock(&inode->i_lock); /* * Move the inode off the IO lists and LRU once I_FREEING is @@ -582,8 +603,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) */ list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -641,8 +660,10 @@ static void prune_icache(int nr_to_scan) * Referenced or dirty inodes are still in use. Give them * another pass through the LRU as we canot reclaim them now. */ + spin_lock(&inode->i_lock); if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { + spin_unlock(&inode->i_lock); list_del_init(&inode->i_lru); inodes_stat.nr_unused--; continue; @@ -650,12 +671,14 @@ static void prune_icache(int nr_to_scan) /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { - list_move(&inode->i_lru, &inode_lru); inode->i_state &= ~I_REFERENCED; + spin_unlock(&inode->i_lock); + list_move(&inode->i_lru, &inode_lru); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, @@ -666,11 +689,15 @@ static void prune_icache(int nr_to_scan) if (inode != list_entry(inode_lru.next, struct inode, i_lru)) continue; /* wrong inode or list_empty */ - if (!can_unuse(inode)) + spin_lock(&inode->i_lock); + if (!can_unuse(inode)) { + spin_unlock(&inode->i_lock); continue; + } } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + spin_unlock(&inode->i_lock); /* * Move the inode off the IO lists and LRU once I_FREEING is @@ -737,11 +764,13 @@ repeat: continue; if (!test(inode, data)) continue; + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } __iget(inode); + spin_unlock(&inode->i_lock); return inode; } return NULL; @@ -763,11 +792,13 @@ repeat: continue; if (inode->i_sb != sb) continue; + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } __iget(inode); + spin_unlock(&inode->i_lock); return inode; } return NULL; @@ -832,14 +863,23 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - __inode_sb_list_add(inode); + spin_lock(&inode->i_lock); inode->i_state = 0; + spin_unlock(&inode->i_lock); + __inode_sb_list_add(inode); spin_unlock(&inode_lock); } return inode; } EXPORT_SYMBOL(new_inode); +/** + * unlock_new_inode - clear the I_NEW state and wake up any waiters + * @inode: new inode to unlock + * + * Called when the inode is fully initialised to clear the new state of the + * inode and wake up anyone waiting for the inode to finish initialisation. + */ void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -859,19 +899,11 @@ void unlock_new_inode(struct inode *inode) } } #endif - /* - * This is special! We do not need the spinlock when clearing I_NEW, - * because we're guaranteed that nobody else tries to do anything about - * the state of the inode when it is locked, as we just created it (so - * there can be no old holders that haven't tested I_NEW). - * However we must emit the memory barrier so that other CPUs reliably - * see the clearing of I_NEW after the other inode initialisation has - * completed. - */ - smp_mb(); + spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; - wake_up_inode(inode); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); @@ -900,9 +932,11 @@ static struct inode *get_new_inode(struct super_block *sb, if (set(inode, data)) goto set_failed; + spin_lock(&inode->i_lock); + inode->i_state = I_NEW; hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); __inode_sb_list_add(inode); - inode->i_state = I_NEW; spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the @@ -947,9 +981,11 @@ static struct inode *get_new_inode_fast(struct super_block *sb, old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; + spin_lock(&inode->i_lock); + inode->i_state = I_NEW; hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); __inode_sb_list_add(inode); - inode->i_state = I_NEW; spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the @@ -1034,15 +1070,19 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) + spin_lock(&inode->i_lock); + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { __iget(inode); - else + spin_unlock(&inode->i_lock); + } else { + spin_unlock(&inode->i_lock); /* * Handle the case where s_op->clear_inode is not been * called yet, and somebody is calling igrab * while the inode is getting freed. */ inode = NULL; + } spin_unlock(&inode_lock); return inode; } @@ -1271,7 +1311,6 @@ int insert_inode_locked(struct inode *inode) ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); - inode->i_state |= I_NEW; while (1) { struct hlist_node *node; struct inode *old = NULL; @@ -1281,16 +1320,23 @@ int insert_inode_locked(struct inode *inode) continue; if (old->i_sb != sb) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + spin_lock(&old->i_lock); + if (old->i_state & (I_FREEING|I_WILL_FREE)) { + spin_unlock(&old->i_lock); continue; + } break; } if (likely(!node)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); return 0; } __iget(old); + spin_unlock(&old->i_lock); spin_unlock(&inode_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { @@ -1308,8 +1354,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct super_block *sb = inode->i_sb; struct hlist_head *head = inode_hashtable + hash(sb, hashval); - inode->i_state |= I_NEW; - while (1) { struct hlist_node *node; struct inode *old = NULL; @@ -1320,16 +1364,23 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, continue; if (!test(old, data)) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + spin_lock(&old->i_lock); + if (old->i_state & (I_FREEING|I_WILL_FREE)) { + spin_unlock(&old->i_lock); continue; + } break; } if (likely(!node)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); return 0; } __iget(old); + spin_unlock(&old->i_lock); spin_unlock(&inode_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { @@ -1375,6 +1426,9 @@ static void iput_final(struct inode *inode) const struct super_operations *op = inode->i_sb->s_op; int drop; + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_NEW); + if (op && op->drop_inode) drop = op->drop_inode(inode); else @@ -1386,21 +1440,23 @@ static void iput_final(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) { inode_lru_list_add(inode); } + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); return; } - WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); + spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; __remove_inode_hash(inode); } - WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + spin_unlock(&inode->i_lock); /* * Move the inode off the IO lists and LRU once I_FREEING is @@ -1413,8 +1469,10 @@ static void iput_final(struct inode *inode) spin_unlock(&inode_lock); evict(inode); remove_inode_hash(inode); - wake_up_inode(inode); + spin_lock(&inode->i_lock); + wake_up_bit(&inode->i_state, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); + spin_unlock(&inode->i_lock); destroy_inode(inode); } @@ -1611,9 +1669,8 @@ EXPORT_SYMBOL(inode_wait); * to recheck inode state. * * It doesn't matter if I_NEW is not set initially, a call to - * wake_up_inode() after removing from the hash list will DTRT. - * - * This is called with inode_lock held. + * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list + * will DTRT. */ static void __wait_on_freeing_inode(struct inode *inode) { @@ -1621,6 +1678,7 @@ static void __wait_on_freeing_inode(struct inode *inode) DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); schedule(); finish_wait(wq, &wait.wait); -- cgit From b2b2af8e614b4dcd8aca1369d82ce5ad0461a7b1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:37 +1100 Subject: fs: factor inode disposal We have a couple of places that dispose of inodes. factor the disposal into evict() to isolate this code and make it simpler to peel away the inode_lock from the code. While doing this, change the logic flow in iput_final() to separate the different cases that need to be handled to make the transitions the inode goes through more obvious. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 104 ++++++++++++++++++++++++------------------------------------- 1 file changed, 41 insertions(+), 63 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 14b12c4ee026..f752a959254b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -422,17 +422,6 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) } EXPORT_SYMBOL(__insert_inode_hash); -/** - * __remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. - */ -static void __remove_inode_hash(struct inode *inode) -{ - hlist_del_init(&inode->i_hash); -} - /** * remove_inode_hash - remove an inode from the hash * @inode: inode to unhash @@ -462,10 +451,31 @@ void end_writeback(struct inode *inode) } EXPORT_SYMBOL(end_writeback); +/* + * Free the inode passed in, removing it from the lists it is still connected + * to. We remove any pages still attached to the inode and wait for any IO that + * is still in progress before finally destroying the inode. + * + * An inode must already be marked I_FREEING so that we avoid the inode being + * moved back onto lists if we race with other code that manipulates the lists + * (e.g. writeback_single_inode). The caller is responsible for setting this. + * + * An inode must already be removed from the LRU list before being evicted from + * the cache. This should occur atomically with setting the I_FREEING state + * flag, so no inodes here should ever be on the LRU when being evicted. + */ static void evict(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; + BUG_ON(!(inode->i_state & I_FREEING)); + BUG_ON(!list_empty(&inode->i_lru)); + + spin_lock(&inode_lock); + list_del_init(&inode->i_wb_list); + __inode_sb_list_del(inode); + spin_unlock(&inode_lock); + if (op->evict_inode) { op->evict_inode(inode); } else { @@ -477,6 +487,15 @@ static void evict(struct inode *inode) bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); + + remove_inode_hash(inode); + + spin_lock(&inode->i_lock); + wake_up_bit(&inode->i_state, __I_NEW); + BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); + spin_unlock(&inode->i_lock); + + destroy_inode(inode); } /* @@ -495,16 +514,6 @@ static void dispose_list(struct list_head *head) list_del_init(&inode->i_lru); evict(inode); - - spin_lock(&inode_lock); - __remove_inode_hash(inode); - __inode_sb_list_del(inode); - spin_unlock(&inode_lock); - - spin_lock(&inode->i_lock); - wake_up_bit(&inode->i_state, __I_NEW); - spin_unlock(&inode->i_lock); - destroy_inode(inode); } } @@ -537,13 +546,7 @@ void evict_inodes(struct super_block *sb) if (!(inode->i_state & (I_DIRTY | I_SYNC))) inodes_stat.nr_unused--; spin_unlock(&inode->i_lock); - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ list_move(&inode->i_lru, &dispose); - list_del_init(&inode->i_wb_list); } spin_unlock(&inode_lock); @@ -596,13 +599,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) if (!(inode->i_state & (I_DIRTY | I_SYNC))) inodes_stat.nr_unused--; spin_unlock(&inode->i_lock); - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ list_move(&inode->i_lru, &dispose); - list_del_init(&inode->i_wb_list); } spin_unlock(&inode_lock); @@ -699,12 +696,7 @@ static void prune_icache(int nr_to_scan) inode->i_state |= I_FREEING; spin_unlock(&inode->i_lock); - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ list_move(&inode->i_lru, &freeable); - list_del_init(&inode->i_wb_list); inodes_stat.nr_unused--; } if (current_is_kswapd()) @@ -1434,16 +1426,16 @@ static void iput_final(struct inode *inode) else drop = generic_drop_inode(inode); + if (!drop && (sb->s_flags & MS_ACTIVE)) { + inode->i_state |= I_REFERENCED; + if (!(inode->i_state & (I_DIRTY|I_SYNC))) + inode_lru_list_add(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); + return; + } + if (!drop) { - if (sb->s_flags & MS_ACTIVE) { - inode->i_state |= I_REFERENCED; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) { - inode_lru_list_add(inode); - } - spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); - return; - } inode->i_state |= I_WILL_FREE; spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); @@ -1452,28 +1444,14 @@ static void iput_final(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - __remove_inode_hash(inode); } inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); - - /* - * Move the inode off the IO lists and LRU once I_FREEING is - * set so that it won't get moved back on there if it is dirty. - */ inode_lru_list_del(inode); - list_del_init(&inode->i_wb_list); - - __inode_sb_list_del(inode); + spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); + evict(inode); - remove_inode_hash(inode); - spin_lock(&inode->i_lock); - wake_up_bit(&inode->i_state, __I_NEW); - BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); - spin_unlock(&inode->i_lock); - destroy_inode(inode); } /** -- cgit From 02afc410f363f98ac4f186341e38dcec13fc0e60 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:38 +1100 Subject: fs: Lock the inode LRU list separately Introduce the inode_lru_lock to protect the inode_lru list. This lock is nested inside the inode->i_lock to allow the inode to be added to the LRU list in iput_final without needing to deal with lock inversions. This keeps iput_final() clean and neat. Further, where marking the inode I_FREEING and removing it from the LRU, move the LRU list manipulation within the inode->i_lock to keep the list manipulation consistent with iput_final. This also means that most of the open coded LRU list removal + unused inode accounting can now use the inode_lru_list_del() wrappers which cleans the code up further. However, this locking change means what the LRU traversal in prune_icache() inverts this lock ordering and needs to use trylock semantics on the inode->i_lock to avoid deadlocking. In these cases, if we fail to lock the inode we move it to the back of the LRU to prevent spinning on it. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f752a959254b..b19cb6ee6ca3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -32,10 +32,13 @@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() + * inode_lru_lock protects: + * inode_lru, inode->i_lru * * Lock ordering: * inode_lock * inode->i_lock + * inode_lru_lock */ /* @@ -85,6 +88,7 @@ static unsigned int i_hash_shift __read_mostly; */ static LIST_HEAD(inode_lru); +static DEFINE_SPINLOCK(inode_lru_lock); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -356,18 +360,22 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { + spin_lock(&inode_lru_lock); if (list_empty(&inode->i_lru)) { list_add(&inode->i_lru, &inode_lru); inodes_stat.nr_unused++; } + spin_unlock(&inode_lru_lock); } static void inode_lru_list_del(struct inode *inode) { + spin_lock(&inode_lru_lock); if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); inodes_stat.nr_unused--; } + spin_unlock(&inode_lru_lock); } static inline void __inode_sb_list_add(struct inode *inode) @@ -543,10 +551,9 @@ void evict_inodes(struct super_block *sb) } inode->i_state |= I_FREEING; - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); - list_move(&inode->i_lru, &dispose); + list_add(&inode->i_lru, &dispose); } spin_unlock(&inode_lock); @@ -596,10 +603,9 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) } inode->i_state |= I_FREEING; - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - inodes_stat.nr_unused--; + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); - list_move(&inode->i_lru, &dispose); + list_add(&inode->i_lru, &dispose); } spin_unlock(&inode_lock); @@ -623,7 +629,7 @@ static int can_unuse(struct inode *inode) /* * Scan `goal' inodes on the unused list for freeable ones. They are moved to a - * temporary list and then are freed outside inode_lock by dispose_list(). + * temporary list and then are freed outside inode_lru_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -645,6 +651,7 @@ static void prune_icache(int nr_to_scan) down_read(&iprune_sem); spin_lock(&inode_lock); + spin_lock(&inode_lru_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -653,11 +660,20 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_lru.prev, struct inode, i_lru); + /* + * we are inverting the inode_lru_lock/inode->i_lock here, + * so use a trylock. If we fail to get the lock, just move the + * inode to the back of the list so we don't spin on it. + */ + if (!spin_trylock(&inode->i_lock)) { + list_move(&inode->i_lru, &inode_lru); + continue; + } + /* * Referenced or dirty inodes are still in use. Give them * another pass through the LRU as we canot reclaim them now. */ - spin_lock(&inode->i_lock); if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { spin_unlock(&inode->i_lock); @@ -676,17 +692,21 @@ static void prune_icache(int nr_to_scan) if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode->i_lock); + spin_unlock(&inode_lru_lock); spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); spin_lock(&inode_lock); + spin_lock(&inode_lru_lock); if (inode != list_entry(inode_lru.next, struct inode, i_lru)) continue; /* wrong inode or list_empty */ - spin_lock(&inode->i_lock); + /* avoid lock inversions with trylock */ + if (!spin_trylock(&inode->i_lock)) + continue; if (!can_unuse(inode)) { spin_unlock(&inode->i_lock); continue; @@ -703,6 +723,7 @@ static void prune_icache(int nr_to_scan) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); + spin_unlock(&inode_lru_lock); spin_unlock(&inode_lock); dispose_list(&freeable); -- cgit From f283c86afe6aa70b733d1ecebad5d9464943b774 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:39 +1100 Subject: fs: remove inode_lock from iput_final and prune_icache Now that inode state changes are protected by the inode->i_lock and the inode LRU manipulations by the inode_lru_lock, we can remove the inode_lock from prune_icache and the initial part of iput_final(). instead of using the inode_lock to protect the inode during iput_final, use the inode->i_lock instead. This protects the inode against new references being taken while we change the inode state to I_FREEING, as well as preventing prune_icache from grabbing the inode while we are manipulating it. Hence we no longer need the inode_lock in iput_final prior to setting I_FREEING on the inode. For prune_icache, we no longer need the inode_lock to protect the LRU list, and the inodes themselves are protected against freeing races by the inode->i_lock. Hence we can lift the inode_lock from prune_icache as well. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index b19cb6ee6ca3..389f5a247599 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -650,7 +650,6 @@ static void prune_icache(int nr_to_scan) unsigned long reap = 0; down_read(&iprune_sem); - spin_lock(&inode_lock); spin_lock(&inode_lru_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -676,8 +675,8 @@ static void prune_icache(int nr_to_scan) */ if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { - spin_unlock(&inode->i_lock); list_del_init(&inode->i_lru); + spin_unlock(&inode->i_lock); inodes_stat.nr_unused--; continue; } @@ -685,20 +684,18 @@ static void prune_icache(int nr_to_scan) /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; - spin_unlock(&inode->i_lock); list_move(&inode->i_lru, &inode_lru); + spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&inode_lru_lock); - spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); - spin_lock(&inode_lock); spin_lock(&inode_lru_lock); if (inode != list_entry(inode_lru.next, @@ -724,7 +721,6 @@ static void prune_icache(int nr_to_scan) else __count_vm_events(PGINODESTEAL, reap); spin_unlock(&inode_lru_lock); - spin_unlock(&inode_lock); dispose_list(&freeable); up_read(&iprune_sem); @@ -1082,7 +1078,6 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { - spin_lock(&inode_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { __iget(inode); @@ -1096,7 +1091,6 @@ struct inode *igrab(struct inode *inode) */ inode = NULL; } - spin_unlock(&inode_lock); return inode; } EXPORT_SYMBOL(igrab); @@ -1439,7 +1433,6 @@ static void iput_final(struct inode *inode) const struct super_operations *op = inode->i_sb->s_op; int drop; - spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); if (op && op->drop_inode) @@ -1452,16 +1445,13 @@ static void iput_final(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) inode_lru_list_add(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); return; } if (!drop) { inode->i_state |= I_WILL_FREE; spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); write_inode_now(inode, 1); - spin_lock(&inode_lock); spin_lock(&inode->i_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; @@ -1470,7 +1460,6 @@ static void iput_final(struct inode *inode) inode->i_state |= I_FREEING; inode_lru_list_del(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); evict(inode); } @@ -1489,7 +1478,7 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state & I_CLEAR); - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) iput_final(inode); } } -- cgit From 55fa6091d83160ca772fc37cebae45d42695a708 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:40 +1100 Subject: fs: move i_sb_list out from under inode_lock Protect the per-sb inode list with a new global lock inode_sb_list_lock and use it to protect the list manipulations and traversals. This lock replaces the inode_lock as the inodes on the list can be validity checked while holding the inode->i_lock and hence the inode_lock is no longer needed to protect the list. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 389f5a247599..785b1ab23ff0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -34,10 +34,15 @@ * inode->i_state, inode->i_hash, __iget() * inode_lru_lock protects: * inode_lru, inode->i_lru + * inode_sb_list_lock protects: + * sb->s_inodes, inode->i_sb_list * * Lock ordering: * inode_lock * inode->i_lock + * + * inode_sb_list_lock + * inode->i_lock * inode_lru_lock */ @@ -99,6 +104,8 @@ static struct hlist_head *inode_hashtable __read_mostly; */ DEFINE_SPINLOCK(inode_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); + /* * iprune_sem provides exclusion between the icache shrinking and the * umount path. @@ -378,26 +385,23 @@ static void inode_lru_list_del(struct inode *inode) spin_unlock(&inode_lru_lock); } -static inline void __inode_sb_list_add(struct inode *inode) -{ - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); -} - /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add */ void inode_sb_list_add(struct inode *inode) { - spin_lock(&inode_lock); - __inode_sb_list_add(inode); - spin_unlock(&inode_lock); + spin_lock(&inode_sb_list_lock); + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); + spin_unlock(&inode_sb_list_lock); } EXPORT_SYMBOL_GPL(inode_sb_list_add); -static inline void __inode_sb_list_del(struct inode *inode) +static inline void inode_sb_list_del(struct inode *inode) { + spin_lock(&inode_sb_list_lock); list_del_init(&inode->i_sb_list); + spin_unlock(&inode_sb_list_lock); } static unsigned long hash(struct super_block *sb, unsigned long hashval) @@ -481,9 +485,10 @@ static void evict(struct inode *inode) spin_lock(&inode_lock); list_del_init(&inode->i_wb_list); - __inode_sb_list_del(inode); spin_unlock(&inode_lock); + inode_sb_list_del(inode); + if (op->evict_inode) { op->evict_inode(inode); } else { @@ -539,7 +544,7 @@ void evict_inodes(struct super_block *sb) struct inode *inode, *next; LIST_HEAD(dispose); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { if (atomic_read(&inode->i_count)) continue; @@ -555,7 +560,7 @@ void evict_inodes(struct super_block *sb) spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); @@ -584,7 +589,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) struct inode *inode, *next; LIST_HEAD(dispose); - spin_lock(&inode_lock); + spin_lock(&inode_sb_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { @@ -607,7 +612,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); } - spin_unlock(&inode_lock); + spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); @@ -867,16 +872,14 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - spin_lock_prefetch(&inode_lock); + spin_lock_prefetch(&inode_sb_list_lock); inode = alloc_inode(sb); if (inode) { - spin_lock(&inode_lock); spin_lock(&inode->i_lock); inode->i_state = 0; spin_unlock(&inode->i_lock); - __inode_sb_list_add(inode); - spin_unlock(&inode_lock); + inode_sb_list_add(inode); } return inode; } @@ -945,7 +948,7 @@ static struct inode *get_new_inode(struct super_block *sb, inode->i_state = I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); - __inode_sb_list_add(inode); + inode_sb_list_add(inode); spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the @@ -994,7 +997,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, inode->i_state = I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); - __inode_sb_list_add(inode); + inode_sb_list_add(inode); spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the -- cgit From a66979abad090b2765a6c6790c9fdeab996833f2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:41 +1100 Subject: fs: move i_wb_list out from under inode_lock Protect the inode writeback list with a new global lock inode_wb_list_lock and use it to protect the list manipulations and traversals. This lock replaces the inode_lock as the inodes on the list can be validity checked while holding the inode->i_lock and hence the inode_lock is no longer needed to protect the list. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 785b1ab23ff0..239fdc08719e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include "internal.h" /* * inode locking rules. @@ -36,6 +37,8 @@ * inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list + * inode_wb_list_lock protects: + * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list * * Lock ordering: * inode_lock @@ -44,6 +47,9 @@ * inode_sb_list_lock * inode->i_lock * inode_lru_lock + * + * inode_wb_list_lock + * inode->i_lock */ /* @@ -105,6 +111,7 @@ static struct hlist_head *inode_hashtable __read_mostly; DEFINE_SPINLOCK(inode_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); /* * iprune_sem provides exclusion between the icache shrinking and the @@ -483,10 +490,7 @@ static void evict(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - spin_lock(&inode_lock); - list_del_init(&inode->i_wb_list); - spin_unlock(&inode_lock); - + inode_wb_list_del(inode); inode_sb_list_del(inode); if (op->evict_inode) { -- cgit From 67a23c494621ff1d5431c3bc320947865b224625 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 22 Mar 2011 22:23:42 +1100 Subject: fs: rename inode_lock to inode_hash_lock All that remains of the inode_lock is protecting the inode hash list manipulation and traversals. Rename the inode_lock to inode_hash_lock to reflect it's actual function. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 111 +++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 61 insertions(+), 50 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 239fdc08719e..f9ee4928358f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -39,10 +39,10 @@ * sb->s_inodes, inode->i_sb_list * inode_wb_list_lock protects: * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list + * inode_hash_lock protects: + * inode_hashtable, inode->i_hash * * Lock ordering: - * inode_lock - * inode->i_lock * * inode_sb_list_lock * inode->i_lock @@ -50,6 +50,13 @@ * * inode_wb_list_lock * inode->i_lock + * + * inode_hash_lock + * inode_sb_list_lock + * inode->i_lock + * + * iunique_lock + * inode_hash_lock */ /* @@ -85,6 +92,8 @@ static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; +static struct hlist_head *inode_hashtable __read_mostly; +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); /* * Each inode can be on two separate lists. One is @@ -100,15 +109,6 @@ static unsigned int i_hash_shift __read_mostly; static LIST_HEAD(inode_lru); static DEFINE_SPINLOCK(inode_lru_lock); -static struct hlist_head *inode_hashtable __read_mostly; - -/* - * A simple spinlock to protect the list manipulations. - * - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ -DEFINE_SPINLOCK(inode_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); @@ -433,11 +433,11 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); hlist_add_head(&inode->i_hash, b); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(__insert_inode_hash); @@ -449,11 +449,11 @@ EXPORT_SYMBOL(__insert_inode_hash); */ void remove_inode_hash(struct inode *inode) { - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); hlist_del_init(&inode->i_hash); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); } EXPORT_SYMBOL(remove_inode_hash); @@ -778,11 +778,15 @@ static struct inode *find_inode(struct super_block *sb, repeat: hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_sb != sb) + spin_lock(&inode->i_lock); + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; - if (!test(inode, data)) + } + if (!test(inode, data)) { + spin_unlock(&inode->i_lock); continue; - spin_lock(&inode->i_lock); + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -806,11 +810,15 @@ static struct inode *find_inode_fast(struct super_block *sb, repeat: hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_ino != ino) + spin_lock(&inode->i_lock); + if (inode->i_ino != ino) { + spin_unlock(&inode->i_lock); continue; - if (inode->i_sb != sb) + } + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; - spin_lock(&inode->i_lock); + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -924,7 +932,7 @@ void unlock_new_inode(struct inode *inode) EXPORT_SYMBOL(unlock_new_inode); /* - * This is called without the inode lock held.. Be careful. + * This is called without the inode hash lock held.. Be careful. * * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org @@ -941,7 +949,7 @@ static struct inode *get_new_inode(struct super_block *sb, if (inode) { struct inode *old; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); /* We released the lock, so.. */ old = find_inode(sb, head, test, data); if (!old) { @@ -953,7 +961,7 @@ static struct inode *get_new_inode(struct super_block *sb, hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); inode_sb_list_add(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -966,7 +974,7 @@ static struct inode *get_new_inode(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); @@ -974,7 +982,7 @@ static struct inode *get_new_inode(struct super_block *sb, return inode; set_failed: - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); return NULL; } @@ -992,7 +1000,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, if (inode) { struct inode *old; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); /* We released the lock, so.. */ old = find_inode_fast(sb, head, ino); if (!old) { @@ -1002,7 +1010,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); inode_sb_list_add(inode); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -1015,7 +1023,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); @@ -1036,10 +1044,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino) struct hlist_node *node; struct inode *inode; + spin_lock(&inode_hash_lock); hlist_for_each_entry(inode, node, b, i_hash) { - if (inode->i_ino == ino && inode->i_sb == sb) + if (inode->i_ino == ino && inode->i_sb == sb) { + spin_unlock(&inode_hash_lock); return 0; + } } + spin_unlock(&inode_hash_lock); return 1; } @@ -1069,7 +1081,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) static unsigned int counter; ino_t res; - spin_lock(&inode_lock); spin_lock(&iunique_lock); do { if (counter <= max_reserved) @@ -1077,7 +1088,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) res = counter++; } while (!test_inode_iunique(sb, res)); spin_unlock(&iunique_lock); - spin_unlock(&inode_lock); return res; } @@ -1119,7 +1129,7 @@ EXPORT_SYMBOL(igrab); * * Otherwise NULL is returned. * - * Note, @test is called with the inode_lock held, so can't sleep. + * Note, @test is called with the inode_hash_lock held, so can't sleep. */ static struct inode *ifind(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), @@ -1127,15 +1137,15 @@ static struct inode *ifind(struct super_block *sb, { struct inode *inode; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data); if (inode) { - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); if (likely(wait)) wait_on_inode(inode); return inode; } - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); return NULL; } @@ -1159,14 +1169,14 @@ static struct inode *ifind_fast(struct super_block *sb, { struct inode *inode; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); inode = find_inode_fast(sb, head, ino); if (inode) { - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); wait_on_inode(inode); return inode; } - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); return NULL; } @@ -1189,7 +1199,7 @@ static struct inode *ifind_fast(struct super_block *sb, * * Otherwise NULL is returned. * - * Note, @test is called with the inode_lock held, so can't sleep. + * Note, @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) @@ -1217,7 +1227,7 @@ EXPORT_SYMBOL(ilookup5_nowait); * * Otherwise NULL is returned. * - * Note, @test is called with the inode_lock held, so can't sleep. + * Note, @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) @@ -1268,7 +1278,8 @@ EXPORT_SYMBOL(ilookup); * inode and this is returned locked, hashed, and with the I_NEW flag set. The * file system gets to fill it in before unlocking it via unlock_new_inode(). * - * Note both @test and @set are called with the inode_lock held, so can't sleep. + * Note both @test and @set are called with the inode_hash_lock held, so can't + * sleep. */ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), @@ -1328,7 +1339,7 @@ int insert_inode_locked(struct inode *inode) while (1) { struct hlist_node *node; struct inode *old = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); hlist_for_each_entry(old, node, head, i_hash) { if (old->i_ino != ino) continue; @@ -1346,12 +1357,12 @@ int insert_inode_locked(struct inode *inode) inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); return 0; } __iget(old); spin_unlock(&old->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); @@ -1372,7 +1383,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct hlist_node *node; struct inode *old = NULL; - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); hlist_for_each_entry(old, node, head, i_hash) { if (old->i_sb != sb) continue; @@ -1390,12 +1401,12 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); return 0; } __iget(old); spin_unlock(&old->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); wait_on_inode(old); if (unlikely(!inode_unhashed(old))) { iput(old); @@ -1674,10 +1685,10 @@ static void __wait_on_freeing_inode(struct inode *inode) wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lock); + spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wait); - spin_lock(&inode_lock); + spin_lock(&inode_hash_lock); } static __initdata unsigned long ihash_entries; -- cgit From 0b2d0724e26a335cd326eb7ad552c109116a8795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Mar 2011 15:03:28 -0400 Subject: fs: simplify iget & friends Merge get_new_inode/get_new_inode_fast into iget5_locked/iget_locked as those were the only callers. Remove the internal ifind/ifind_fast helpers - ifind_fast only had a single caller, and ifind had two callers wanting it to do different things. Also clean up the comments in this area to focus on information important to a developer trying to use it, instead of overloading them with implementation details. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 262 ++++++++++++++++++++----------------------------------------- 1 file changed, 83 insertions(+), 179 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f9ee4928358f..05a1f75ae791 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -931,20 +931,42 @@ void unlock_new_inode(struct inode *inode) } EXPORT_SYMBOL(unlock_new_inode); -/* - * This is called without the inode hash lock held.. Be careful. +/** + * iget5_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @hashval: hash value (usually inode number) to get + * @test: callback used for comparisons between inodes + * @set: callback used to initialize a new struct inode + * @data: opaque data pointer to pass to @test and @set + * + * Search for the inode specified by @hashval and @data in the inode cache, + * and if present it is return it with an increased reference count. This is + * a generalized version of iget_locked() for file systems where the inode + * number is not sufficient for unique identification of an inode. + * + * If the inode is not in cache, allocate a new inode and return it locked, + * hashed, and with the I_NEW flag set. The file system gets to fill it in + * before unlocking it via unlock_new_inode(). * - * We no longer cache the sb_flags in i_flags - see fs.h - * -- rmk@arm.uk.linux.org + * Note both @test and @set are called with the inode_hash_lock held, so can't + * sleep. */ -static struct inode *get_new_inode(struct super_block *sb, - struct hlist_head *head, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), - void *data) +struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data) { + struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode(sb, head, test, data); + spin_unlock(&inode_hash_lock); + + if (inode) { + wait_on_inode(inode); + return inode; + } + inode = alloc_inode(sb); if (inode) { struct inode *old; @@ -986,16 +1008,34 @@ set_failed: destroy_inode(inode); return NULL; } +EXPORT_SYMBOL(iget5_locked); -/* - * get_new_inode_fast is the fast path version of get_new_inode, see the - * comment at iget_locked for details. +/** + * iget_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @ino: inode number to get + * + * Search for the inode specified by @ino in the inode cache and if present + * return it with an increased reference count. This is for file systems + * where the inode number is sufficient for unique identification of an inode. + * + * If the inode is not in cache, allocate a new inode and return it locked, + * hashed, and with the I_NEW flag set. The file system gets to fill it in + * before unlocking it via unlock_new_inode(). */ -static struct inode *get_new_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) +struct inode *iget_locked(struct super_block *sb, unsigned long ino) { + struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino); + spin_unlock(&inode_hash_lock); + if (inode) { + wait_on_inode(inode); + return inode; + } + inode = alloc_inode(sb); if (inode) { struct inode *old; @@ -1030,6 +1070,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, } return inode; } +EXPORT_SYMBOL(iget_locked); /* * search the inode cache for a matching inode number. @@ -1113,100 +1154,32 @@ struct inode *igrab(struct inode *inode) EXPORT_SYMBOL(igrab); /** - * ifind - internal function, you want ilookup5() or iget5(). + * ilookup5_nowait - search for an inode in the inode cache * @sb: super block of file system to search - * @head: the head of the list to search + * @hashval: hash value (usually inode number) to search for * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test - * @wait: if true wait for the inode to be unlocked, if false do not - * - * ifind() searches for the inode specified by @data in the inode - * cache. This is a generalized version of ifind_fast() for file systems where - * the inode number is not sufficient for unique identification of an inode. * + * Search for the inode specified by @hashval and @data in the inode cache. * If the inode is in the cache, the inode is returned with an incremented * reference count. * - * Otherwise NULL is returned. + * Note: I_NEW is not waited upon so you have to be very careful what you do + * with the returned inode. You probably should be using ilookup5() instead. * - * Note, @test is called with the inode_hash_lock held, so can't sleep. + * Note: @test is called with the inode_hash_lock held, so can't sleep. */ -static struct inode *ifind(struct super_block *sb, - struct hlist_head *head, int (*test)(struct inode *, void *), - void *data, const int wait) +struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), void *data) { + struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data); - if (inode) { - spin_unlock(&inode_hash_lock); - if (likely(wait)) - wait_on_inode(inode); - return inode; - } - spin_unlock(&inode_hash_lock); - return NULL; -} - -/** - * ifind_fast - internal function, you want ilookup() or iget(). - * @sb: super block of file system to search - * @head: head of the list to search - * @ino: inode number to search for - * - * ifind_fast() searches for the inode @ino in the inode cache. This is for - * file systems where the inode number is sufficient for unique identification - * of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. - * - * Otherwise NULL is returned. - */ -static struct inode *ifind_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) -{ - struct inode *inode; - - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); - if (inode) { - spin_unlock(&inode_hash_lock); - wait_on_inode(inode); - return inode; - } spin_unlock(&inode_hash_lock); - return NULL; -} - -/** - * ilookup5_nowait - search for an inode in the inode cache - * @sb: super block of file system to search - * @hashval: hash value (usually inode number) to search for - * @test: callback used for comparisons between inodes - * @data: opaque data pointer to pass to @test - * - * ilookup5() uses ifind() to search for the inode specified by @hashval and - * @data in the inode cache. This is a generalized version of ilookup() for - * file systems where the inode number is not sufficient for unique - * identification of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. Note, the inode lock is not waited upon so you have to be - * very careful what you do with the returned inode. You probably should be - * using ilookup5() instead. - * - * Otherwise NULL is returned. - * - * Note, @test is called with the inode_hash_lock held, so can't sleep. - */ -struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), void *data) -{ - struct hlist_head *head = inode_hashtable + hash(sb, hashval); - return ifind(sb, head, test, data, 0); + return inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1217,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait); * @test: callback used for comparisons between inodes * @data: opaque data pointer to pass to @test * - * ilookup5() uses ifind() to search for the inode specified by @hashval and - * @data in the inode cache. This is a generalized version of ilookup() for - * file systems where the inode number is not sufficient for unique - * identification of an inode. - * - * If the inode is in the cache, the inode lock is waited upon and the inode is + * Search for the inode specified by @hashval and @data in the inode cache, + * and if the inode is in the cache, return the inode with an incremented + * reference count. Waits on I_NEW before returning the inode. * returned with an incremented reference count. * - * Otherwise NULL is returned. + * This is a generalized version of ilookup() for file systems where the + * inode number is not sufficient for unique identification of an inode. * - * Note, @test is called with the inode_hash_lock held, so can't sleep. + * Note: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode = ilookup5_nowait(sb, hashval, test, data); - return ifind(sb, head, test, data, 1); + if (inode) + wait_on_inode(inode); + return inode; } EXPORT_SYMBOL(ilookup5); @@ -1243,92 +1216,23 @@ EXPORT_SYMBOL(ilookup5); * @sb: super block of file system to search * @ino: inode number to search for * - * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. - * This is for file systems where the inode number is sufficient for unique - * identification of an inode. - * - * If the inode is in the cache, the inode is returned with an incremented - * reference count. - * - * Otherwise NULL is returned. + * Search for the inode @ino in the inode cache, and if the inode is in the + * cache, the inode is returned with an incremented reference count. */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { struct hlist_head *head = inode_hashtable + hash(sb, ino); - - return ifind_fast(sb, head, ino); -} -EXPORT_SYMBOL(ilookup); - -/** - * iget5_locked - obtain an inode from a mounted file system - * @sb: super block of file system - * @hashval: hash value (usually inode number) to get - * @test: callback used for comparisons between inodes - * @set: callback used to initialize a new struct inode - * @data: opaque data pointer to pass to @test and @set - * - * iget5_locked() uses ifind() to search for the inode specified by @hashval - * and @data in the inode cache and if present it is returned with an increased - * reference count. This is a generalized version of iget_locked() for file - * systems where the inode number is not sufficient for unique identification - * of an inode. - * - * If the inode is not in cache, get_new_inode() is called to allocate a new - * inode and this is returned locked, hashed, and with the I_NEW flag set. The - * file system gets to fill it in before unlocking it via unlock_new_inode(). - * - * Note both @test and @set are called with the inode_hash_lock held, so can't - * sleep. - */ -struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, - int (*test)(struct inode *, void *), - int (*set)(struct inode *, void *), void *data) -{ - struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - inode = ifind(sb, head, test, data, 1); - if (inode) - return inode; - /* - * get_new_inode() will do the right thing, re-trying the search - * in case it had to block at any point. - */ - return get_new_inode(sb, head, test, set, data); -} -EXPORT_SYMBOL(iget5_locked); - -/** - * iget_locked - obtain an inode from a mounted file system - * @sb: super block of file system - * @ino: inode number to get - * - * iget_locked() uses ifind_fast() to search for the inode specified by @ino in - * the inode cache and if present it is returned with an increased reference - * count. This is for file systems where the inode number is sufficient for - * unique identification of an inode. - * - * If the inode is not in cache, get_new_inode_fast() is called to allocate a - * new inode and this is returned locked, hashed, and with the I_NEW flag set. - * The file system gets to fill it in before unlocking it via - * unlock_new_inode(). - */ -struct inode *iget_locked(struct super_block *sb, unsigned long ino) -{ - struct hlist_head *head = inode_hashtable + hash(sb, ino); - struct inode *inode; + spin_lock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino); + spin_unlock(&inode_hash_lock); - inode = ifind_fast(sb, head, ino); if (inode) - return inode; - /* - * get_new_inode_fast() will do the right thing, re-trying the search - * in case it had to block at any point. - */ - return get_new_inode_fast(sb, head, ino); + wait_on_inode(inode); + return inode; } -EXPORT_SYMBOL(iget_locked); +EXPORT_SYMBOL(ilookup); int insert_inode_locked(struct inode *inode) { -- cgit From b6d0ad686da95fa85ce0c583ec35017bf1583563 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Mar 2011 13:27:47 -0700 Subject: fs: fix inode.c kernel-doc warning Fix inode.c kernel-doc fatal error: 2 comment sections have the same name: Error(fs/inode.c:1171): duplicate section name 'Note' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 05a1f75ae791..5f4e11aaeb5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1167,7 +1167,7 @@ EXPORT_SYMBOL(igrab); * Note: I_NEW is not waited upon so you have to be very careful what you do * with the returned inode. You probably should be using ilookup5() instead. * - * Note: @test is called with the inode_hash_lock held, so can't sleep. + * Note2: @test is called with the inode_hash_lock held, so can't sleep. */ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) -- cgit