summaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/commit.c56
-rw-r--r--fs/jbd2/journal.c76
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/jbd2/transaction.c45
4 files changed, 129 insertions, 50 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f3ad1598b201..7f21cf3aaf92 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
int ret;
struct timespec now = current_kernel_time();
+ *cbh = NULL;
+
if (is_journal_aborted(journal))
return 0;
@@ -137,9 +139,9 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
- ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh);
+ ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
else
- ret = submit_bh(WRITE_SYNC_PLUG, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
*cbh = bh;
return ret;
@@ -217,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
- commit_transaction->t_flushed_data_blocks = 1;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -329,19 +330,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int tag_bytes = journal_tag_bytes(journal);
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
- int write_op = WRITE_SYNC;
+ struct blk_plug plug;
/*
* First job: lock down the current transaction and wait for
* all outstanding updates to complete.
*/
-#ifdef COMMIT_STATS
- spin_lock(&journal->j_list_lock);
- summarise_journal_usage(journal);
- spin_unlock(&journal->j_list_lock);
-#endif
-
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
@@ -363,13 +358,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
- /*
- * Use plugged writes here, since we want to submit several before
- * we unplug the device. We don't do explicit unplugging in here,
- * instead we rely on sync_buffer() doing the unplug for us.
- */
- if (commit_transaction->t_synchronous_commit)
- write_op = WRITE_SYNC_PLUG;
trace_jbd2_commit_locking(journal, commit_transaction);
stats.run.rs_wait = commit_transaction->t_max_wait;
stats.run.rs_locked = jiffies;
@@ -410,7 +398,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* we do not require it to remember exactly which old buffers it
* has reserved. This is consistent with the existing behaviour
* that multiple jbd2_journal_get_write_access() calls to the same
- * buffer are perfectly permissable.
+ * buffer are perfectly permissible.
*/
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
@@ -469,8 +457,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
jbd2_journal_abort(journal, err);
+ blk_start_plug(&plug);
jbd2_journal_write_revoke_records(journal, commit_transaction,
- write_op);
+ WRITE_SYNC);
+ blk_finish_plug(&plug);
jbd_debug(3, "JBD: commit phase 2\n");
@@ -497,6 +487,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
err = 0;
descriptor = NULL;
bufs = 0;
+ blk_start_plug(&plug);
while (commit_transaction->t_buffers) {
/* Find the next buffer to be journaled... */
@@ -658,7 +649,7 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(write_op, bh);
+ submit_bh(WRITE_SYNC, bh);
}
cond_resched();
stats.run.rs_blocks_logged += bufs;
@@ -680,12 +671,16 @@ start_journal_io:
err = 0;
}
+ write_lock(&journal->j_state_lock);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ commit_transaction->t_state = T_COMMIT_DFLUSH;
+ write_unlock(&journal->j_state_lock);
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
* the commit record
*/
- if (commit_transaction->t_flushed_data_blocks &&
+ if (commit_transaction->t_need_data_flush &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
@@ -699,6 +694,8 @@ start_journal_io:
__jbd2_journal_abort_hard(journal);
}
+ blk_finish_plug(&plug);
+
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the
@@ -760,8 +757,13 @@ wait_for_iobuf:
required. */
JBUFFER_TRACE(jh, "file as BJ_Forget");
jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
- /* Wake up any transactions which were waiting for this
- IO to complete */
+ /*
+ * Wake up any transactions which were waiting for this IO to
+ * complete. The barrier must be here so that changes by
+ * jbd2_journal_file_buffer() take effect before wake_up_bit()
+ * does the waitqueue check.
+ */
+ smp_mb();
wake_up_bit(&bh->b_state, BH_Unshadow);
JBUFFER_TRACE(jh, "brelse shadowed buffer");
__brelse(bh);
@@ -800,6 +802,10 @@ wait_for_iobuf:
jbd2_journal_abort(journal, err);
jbd_debug(3, "JBD: commit phase 5\n");
+ write_lock(&journal->j_state_lock);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
+ commit_transaction->t_state = T_COMMIT_JFLUSH;
+ write_unlock(&journal->j_state_lock);
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -808,7 +814,7 @@ wait_for_iobuf:
if (err)
__jbd2_journal_abort_hard(journal);
}
- if (!err && !is_journal_aborted(journal))
+ if (cbh)
err = journal_wait_on_commit_record(journal, cbh);
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
@@ -955,7 +961,7 @@ restart_loop:
jbd_debug(3, "JBD: commit phase 7\n");
- J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
commit_transaction->t_start = jiffies;
stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..9a7826990304 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,14 +473,18 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction commit was started.
+ * Called with j_state_lock locked for writing.
+ * Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
/*
- * Are we already doing a recent enough commit?
+ * The only transaction we can possibly wait upon is the
+ * currently running transaction (if it exists). Otherwise,
+ * the target tid must be an old one.
*/
- if (!tid_geq(journal->j_commit_request, target)) {
+ if (journal->j_running_transaction &&
+ journal->j_running_transaction->t_tid == target) {
/*
* We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
@@ -492,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
return 1;
- }
+ } else if (!tid_geq(journal->j_commit_request, target))
+ /* This should never happen, but if it does, preserve
+ the evidence before kjournald goes into a loop and
+ increments j_commit_sequence beyond all recognition. */
+ WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+ journal->j_commit_request,
+ journal->j_commit_sequence,
+ target, journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -520,11 +532,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
{
transaction_t *transaction = NULL;
tid_t tid;
+ int need_to_start = 0;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ if (!tid_geq(journal->j_commit_request, transaction->t_tid))
+ need_to_start = 1;
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;
@@ -535,6 +549,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
tid = transaction->t_tid;
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
return 1;
}
@@ -572,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
}
/*
+ * Return 1 if a given transaction has not yet sent barrier request
+ * connected with a transaction commit. If 0 is returned, transaction
+ * may or may not have sent the barrier. Used to avoid sending barrier
+ * twice in common cases.
+ */
+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
+{
+ int ret = 0;
+ transaction_t *commit_trans;
+
+ if (!(journal->j_flags & JBD2_BARRIER))
+ return 0;
+ read_lock(&journal->j_state_lock);
+ /* Transaction already committed? */
+ if (tid_geq(journal->j_commit_sequence, tid))
+ goto out;
+ commit_trans = journal->j_committing_transaction;
+ if (!commit_trans || commit_trans->t_tid != tid) {
+ ret = 1;
+ goto out;
+ }
+ /*
+ * Transaction is being committed and we already proceeded to
+ * submitting a flush to fs partition?
+ */
+ if (journal->j_fs_dev != journal->j_dev) {
+ if (!commit_trans->t_need_data_flush ||
+ commit_trans->t_state >= T_COMMIT_DFLUSH)
+ goto out;
+ } else {
+ if (commit_trans->t_state >= T_COMMIT_JFLUSH)
+ goto out;
+ }
+ ret = 1;
+out:
+ read_unlock(&journal->j_state_lock);
+ return ret;
+}
+EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
+
+/*
* Wait for a specified commit to complete.
* The caller may not hold the journal lock.
*/
@@ -912,7 +969,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
journal->j_wbufsize = n;
journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+ printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
__func__);
goto out_err;
}
@@ -978,7 +1035,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
journal->j_wbufsize = n;
journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+ printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
__func__);
goto out_err;
}
@@ -986,7 +1043,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
err = jbd2_journal_bmap(journal, 0, &blocknr);
/* If that failed, give up */
if (err) {
- printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
+ printk(KERN_ERR "%s: Cannot locate journal superblock\n",
__func__);
goto out_err;
}
@@ -2408,10 +2465,12 @@ const char *jbd2_dev_to_name(dev_t device)
new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
if (!new_dev)
return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+ bd = bdget(device);
spin_lock(&devname_cache_lock);
if (devcache[i]) {
if (devcache[i]->device == device) {
kfree(new_dev);
+ bdput(bd);
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
return ret;
@@ -2420,7 +2479,6 @@ const char *jbd2_dev_to_name(dev_t device)
}
devcache[i] = new_dev;
devcache[i]->device = device;
- bd = bdget(device);
if (bd) {
bdevname(bd, devcache[i]->devname);
bdput(bd);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9ad321fd63fd..69fd93588118 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -71,7 +71,7 @@
* switching hash tables under them. For operations on the lists of entries in
* the hash table j_revoke_lock is used.
*
- * Finally, also replay code uses the hash tables but at this moment noone else
+ * Finally, also replay code uses the hash tables but at this moment no one else
* can touch them (filesystem isn't mounted yet) and hence no locking is
* needed.
*/
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..3eec82d32fd4 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
*/
/*
- * Update transiaction's maximum wait time, if debugging is enabled.
+ * Update transaction's maximum wait time, if debugging is enabled.
*
* In order for t_max_wait to be reliable, it must be protected by a
* lock. But doing so will mean that start_this_handle() can not be
@@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
* means that maximum wait time reported by the jbd2_run_stats
* tracepoint will always be zero.
*/
-static inline void update_t_max_wait(transaction_t *transaction)
+static inline void update_t_max_wait(transaction_t *transaction,
+ unsigned long ts)
{
#ifdef CONFIG_JBD2_DEBUG
- unsigned long ts = jiffies;
-
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start);
@@ -117,10 +116,11 @@ static inline void update_t_max_wait(transaction_t *transaction)
static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask)
{
- transaction_t *transaction;
- int needed;
- int nblocks = handle->h_buffer_credits;
- transaction_t *new_transaction = NULL;
+ transaction_t *transaction, *new_transaction = NULL;
+ tid_t tid;
+ int needed, need_to_start;
+ int nblocks = handle->h_buffer_credits;
+ unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
@@ -268,7 +271,7 @@ repeat:
/* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction.
*/
- update_t_max_wait(transaction);
+ update_t_max_wait(transaction, ts);
handle->h_transaction = transaction;
atomic_inc(&transaction->t_updates);
atomic_inc(&transaction->t_handle_count);
@@ -313,7 +316,8 @@ static handle_t *new_handle(int nblocks)
* This function is visible to journal users (like ext3fs), so is not
* called with the journal already locked.
*
- * Return a pointer to a newly allocated handle, or NULL on failure
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
*/
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
{
@@ -442,7 +446,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
- int ret;
+ tid_t tid;
+ int need_to_start, ret;
/* If we've had an abort of any type, don't even think about
* actually doing the restart! */
@@ -465,8 +470,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
@@ -914,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
*/
JBUFFER_TRACE(jh, "cancelling revoke");
jbd2_journal_cancel_revoke(handle, jh);
- jbd2_journal_put_journal_head(jh);
out:
+ jbd2_journal_put_journal_head(jh);
return err;
}
@@ -1396,7 +1404,7 @@ int jbd2_journal_stop(handle_t *handle)
/*
* Once we drop t_updates, if it goes to zero the transaction
- * could start commiting on us and eventually disappear. So
+ * could start committing on us and eventually disappear. So
* once we do this, we must not dereference transaction
* pointer again.
*/
@@ -2140,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
jinode->i_next_transaction == transaction)
goto done;
+ /*
+ * We only ever set this variable to 1 so the test is safe. Since
+ * t_need_data_flush is likely to be set, we do the test to save some
+ * cacheline bouncing
+ */
+ if (!transaction->t_need_data_flush)
+ transaction->t_need_data_flush = 1;
/* On some different transaction's list - should be
* the committing one */
if (jinode->i_transaction) {