summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/fs.c10
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/fs-writeback.c45
-rw-r--r--fs/inode.c70
-rw-r--r--include/linux/fs.h56
-rw-r--r--include/linux/writeback.h3
6 files changed, 135 insertions, 56 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 94c392abef65..c0900c0c0f8a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1644,14 +1644,16 @@ again:
break;
}
} else if (clean_pass && this_pass_clean) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
mutex_unlock(&c->vfs_inodes_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
goto again;
}
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 1af75fa68638..894e38cdf4d0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
__d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1a5006329f6f..d8bec3c1bb1f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode)
{
+ assert_spin_locked(&inode->i_lock);
+
inode->i_state &= ~I_SYNC;
/* If inode is clean an unused, put it into LRU now... */
inode_add_lru(inode);
- /* Waiters must see I_SYNC cleared before being woken up */
- smp_mb();
- wake_up_bit(&inode->i_state, __I_SYNC);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_SYNC);
}
static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
*/
void inode_wait_for_writeback(struct inode *inode)
{
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
- wait_queue_head_t *wqh;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ assert_spin_locked(&inode->i_lock);
+
+ if (!(inode->i_state & I_SYNC))
+ return;
- lockdep_assert_held(&inode->i_lock);
- wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- while (inode->i_state & I_SYNC) {
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ if (!(inode->i_state & I_SYNC))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait,
- TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
}
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
static void inode_sleep_on_writeback(struct inode *inode)
__releases(inode->i_lock)
{
- DEFINE_WAIT(wait);
- wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- int sleep;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+ bool sleep;
+
+ assert_spin_locked(&inode->i_lock);
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
- sleep = inode->i_state & I_SYNC;
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ sleep = !!(inode->i_state & I_SYNC);
spin_unlock(&inode->i_lock);
if (sleep)
schedule();
- finish_wait(wqh, &wait);
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
diff --git a/fs/inode.c b/fs/inode.c
index ba1645a09603..aacd05749c1f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED;
}
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit)
+{
+ void *bit_address;
+
+ bit_address = inode_state_wait_address(inode, bit);
+ init_wait_var_entry(wqe, bit_address, 0);
+ return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
inode->i_state &= ~I_LRU_ISOLATING;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_LRU_ISOLATING);
spin_unlock(&inode->i_lock);
}
static void inode_wait_for_lru_isolating(struct inode *inode)
{
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
lockdep_assert_held(&inode->i_lock);
- if (inode->i_state & I_LRU_ISOLATING) {
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
- wait_queue_head_t *wqh;
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ return;
- wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /*
+ * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+ * memory ordering.
+ */
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
/**
@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
* used as an indicator whether blocking on it is safe.
*/
spin_lock(&inode->i_lock);
- wake_up_bit(&inode->i_state, __I_NEW);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
*/
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
/*
* Handle racing against evict(), see that routine for more details.
@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
return;
}
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cda1f2545ea0..a1ef8c65d828 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -681,7 +681,8 @@ struct inode {
#endif
/* Misc */
- unsigned long i_state;
+ u32 i_state;
+ /* 32-bit hole */
struct rw_semaphore i_rwsem;
unsigned long dirtied_when; /* jiffies of first dirtying */
@@ -744,6 +745,21 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+/*
+ * Get bit address from inode->i_state to use with wait_var_event()
+ * infrastructre.
+ */
+#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
+
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit);
+
+static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
+{
+ /* Caller is responsible for correct memory barriers. */
+ wake_up_var(inode_state_wait_address(inode, bit));
+}
+
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
static inline unsigned int i_blocksize(const struct inode *node)
@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
* i_count.
*
* Q: What is the difference between I_WILL_FREE and I_FREEING?
+ *
+ * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
+ * upon. There's one free address left.
*/
-#define I_DIRTY_SYNC (1 << 0)
-#define I_DIRTY_DATASYNC (1 << 1)
-#define I_DIRTY_PAGES (1 << 2)
-#define __I_NEW 3
+#define __I_NEW 0
#define I_NEW (1 << __I_NEW)
-#define I_WILL_FREE (1 << 4)
-#define I_FREEING (1 << 5)
-#define I_CLEAR (1 << 6)
-#define __I_SYNC 7
+#define __I_SYNC 1
#define I_SYNC (1 << __I_SYNC)
-#define I_REFERENCED (1 << 8)
+#define __I_LRU_ISOLATING 2
+#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
+
+#define I_DIRTY_SYNC (1 << 3)
+#define I_DIRTY_DATASYNC (1 << 4)
+#define I_DIRTY_PAGES (1 << 5)
+#define I_WILL_FREE (1 << 6)
+#define I_FREEING (1 << 7)
+#define I_CLEAR (1 << 8)
+#define I_REFERENCED (1 << 9)
#define I_LINKABLE (1 << 10)
#define I_DIRTY_TIME (1 << 11)
-#define I_WB_SWITCH (1 << 13)
-#define I_OVL_INUSE (1 << 14)
-#define I_CREATING (1 << 15)
-#define I_DONTCACHE (1 << 16)
-#define I_SYNC_QUEUED (1 << 17)
-#define I_PINNING_NETFS_WB (1 << 18)
-#define __I_LRU_ISOLATING 19
-#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
+#define I_WB_SWITCH (1 << 12)
+#define I_OVL_INUSE (1 << 13)
+#define I_CREATING (1 << 14)
+#define I_DONTCACHE (1 << 15)
+#define I_SYNC_QUEUED (1 << 16)
+#define I_PINNING_NETFS_WB (1 << 17)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 56b85841ae4c..8f651bb0a1a5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
{
- wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
+ wait_var_event(inode_state_wait_address(inode, __I_NEW),
+ !(READ_ONCE(inode->i_state) & I_NEW));
}
#ifdef CONFIG_CGROUP_WRITEBACK