diff options
Diffstat (limited to 'fs/jbd2')
| -rw-r--r-- | fs/jbd2/commit.c | 56 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 76 | ||||
| -rw-r--r-- | fs/jbd2/revoke.c | 2 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 45 |
4 files changed, 129 insertions, 50 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f3ad1598b201..7f21cf3aaf92 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal, int ret; struct timespec now = current_kernel_time(); + *cbh = NULL; + if (is_journal_aborted(journal)) return 0; @@ -137,9 +139,9 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) - ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); + ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh); else - ret = submit_bh(WRITE_SYNC_PLUG, bh); + ret = submit_bh(WRITE_SYNC, bh); *cbh = bh; return ret; @@ -217,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); - commit_transaction->t_flushed_data_blocks = 1; clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -329,19 +330,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; - int write_op = WRITE_SYNC; + struct blk_plug plug; /* * First job: lock down the current transaction and wait for * all outstanding updates to complete. */ -#ifdef COMMIT_STATS - spin_lock(&journal->j_list_lock); - summarise_journal_usage(journal); - spin_unlock(&journal->j_list_lock); -#endif - /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); @@ -363,13 +358,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) write_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; - /* - * Use plugged writes here, since we want to submit several before - * we unplug the device. We don't do explicit unplugging in here, - * instead we rely on sync_buffer() doing the unplug for us. - */ - if (commit_transaction->t_synchronous_commit) - write_op = WRITE_SYNC_PLUG; trace_jbd2_commit_locking(journal, commit_transaction); stats.run.rs_wait = commit_transaction->t_max_wait; stats.run.rs_locked = jiffies; @@ -410,7 +398,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * we do not require it to remember exactly which old buffers it * has reserved. This is consistent with the existing behaviour * that multiple jbd2_journal_get_write_access() calls to the same - * buffer are perfectly permissable. + * buffer are perfectly permissible. */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; @@ -469,8 +457,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (err) jbd2_journal_abort(journal, err); + blk_start_plug(&plug); jbd2_journal_write_revoke_records(journal, commit_transaction, - write_op); + WRITE_SYNC); + blk_finish_plug(&plug); jbd_debug(3, "JBD: commit phase 2\n"); @@ -497,6 +487,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = 0; descriptor = NULL; bufs = 0; + blk_start_plug(&plug); while (commit_transaction->t_buffers) { /* Find the next buffer to be journaled... */ @@ -658,7 +649,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(write_op, bh); + submit_bh(WRITE_SYNC, bh); } cond_resched(); stats.run.rs_blocks_logged += bufs; @@ -680,12 +671,16 @@ start_journal_io: err = 0; } + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_state = T_COMMIT_DFLUSH; + write_unlock(&journal->j_state_lock); /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue * the commit record */ - if (commit_transaction->t_flushed_data_blocks && + if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); @@ -699,6 +694,8 @@ start_journal_io: __jbd2_journal_abort_hard(journal); } + blk_finish_plug(&plug); + /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the @@ -760,8 +757,13 @@ wait_for_iobuf: required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* Wake up any transactions which were waiting for this - IO to complete */ + /* + * Wake up any transactions which were waiting for this IO to + * complete. The barrier must be here so that changes by + * jbd2_journal_file_buffer() take effect before wake_up_bit() + * does the waitqueue check. + */ + smp_mb(); wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); @@ -800,6 +802,10 @@ wait_for_iobuf: jbd2_journal_abort(journal, err); jbd_debug(3, "JBD: commit phase 5\n"); + write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); + commit_transaction->t_state = T_COMMIT_JFLUSH; + write_unlock(&journal->j_state_lock); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { @@ -808,7 +814,7 @@ wait_for_iobuf: if (err) __jbd2_journal_abort_hard(journal); } - if (!err && !is_journal_aborted(journal)) + if (cbh) err = journal_wait_on_commit_record(journal, cbh); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && @@ -955,7 +961,7 @@ restart_loop: jbd_debug(3, "JBD: commit phase 7\n"); - J_ASSERT(commit_transaction->t_state == T_COMMIT); + J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); commit_transaction->t_start = jiffies; stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9e4686900f18..9a7826990304 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -473,14 +473,18 @@ int __jbd2_log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction commit was started. + * Called with j_state_lock locked for writing. + * Returns true if a transaction commit was started. */ int __jbd2_log_start_commit(journal_t *journal, tid_t target) { /* - * Are we already doing a recent enough commit? + * The only transaction we can possibly wait upon is the + * currently running transaction (if it exists). Otherwise, + * the target tid must be an old one. */ - if (!tid_geq(journal->j_commit_request, target)) { + if (journal->j_running_transaction && + journal->j_running_transaction->t_tid == target) { /* * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. @@ -492,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) journal->j_commit_sequence); wake_up(&journal->j_wait_commit); return 1; - } + } else if (!tid_geq(journal->j_commit_request, target)) + /* This should never happen, but if it does, preserve + the evidence before kjournald goes into a loop and + increments j_commit_sequence beyond all recognition. */ + WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", + journal->j_commit_request, + journal->j_commit_sequence, + target, journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0); return 0; } @@ -520,11 +532,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; + int need_to_start = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); + if (!tid_geq(journal->j_commit_request, transaction->t_tid)) + need_to_start = 1; } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -535,6 +549,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) tid = transaction->t_tid; read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); return 1; } @@ -572,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) } /* + * Return 1 if a given transaction has not yet sent barrier request + * connected with a transaction commit. If 0 is returned, transaction + * may or may not have sent the barrier. Used to avoid sending barrier + * twice in common cases. + */ +int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) +{ + int ret = 0; + transaction_t *commit_trans; + + if (!(journal->j_flags & JBD2_BARRIER)) + return 0; + read_lock(&journal->j_state_lock); + /* Transaction already committed? */ + if (tid_geq(journal->j_commit_sequence, tid)) + goto out; + commit_trans = journal->j_committing_transaction; + if (!commit_trans || commit_trans->t_tid != tid) { + ret = 1; + goto out; + } + /* + * Transaction is being committed and we already proceeded to + * submitting a flush to fs partition? + */ + if (journal->j_fs_dev != journal->j_dev) { + if (!commit_trans->t_need_data_flush || + commit_trans->t_state >= T_COMMIT_DFLUSH) + goto out; + } else { + if (commit_trans->t_state >= T_COMMIT_JFLUSH) + goto out; + } + ret = 1; +out: + read_unlock(&journal->j_state_lock); + return ret; +} +EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); + +/* * Wait for a specified commit to complete. * The caller may not hold the journal lock. */ @@ -912,7 +969,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } @@ -978,7 +1035,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } @@ -986,7 +1043,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) err = jbd2_journal_bmap(journal, 0, &blocknr); /* If that failed, give up */ if (err) { - printk(KERN_ERR "%s: Cannnot locate journal superblock\n", + printk(KERN_ERR "%s: Cannot locate journal superblock\n", __func__); goto out_err; } @@ -2408,10 +2465,12 @@ const char *jbd2_dev_to_name(dev_t device) new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); if (!new_dev) return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + bd = bdget(device); spin_lock(&devname_cache_lock); if (devcache[i]) { if (devcache[i]->device == device) { kfree(new_dev); + bdput(bd); ret = devcache[i]->devname; spin_unlock(&devname_cache_lock); return ret; @@ -2420,7 +2479,6 @@ const char *jbd2_dev_to_name(dev_t device) } devcache[i] = new_dev; devcache[i]->device = device; - bd = bdget(device); if (bd) { bdevname(bd, devcache[i]->devname); bdput(bd); diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 9ad321fd63fd..69fd93588118 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -71,7 +71,7 @@ * switching hash tables under them. For operations on the lists of entries in * the hash table j_revoke_lock is used. * - * Finally, also replay code uses the hash tables but at this moment noone else + * Finally, also replay code uses the hash tables but at this moment no one else * can touch them (filesystem isn't mounted yet) and hence no locking is * needed. */ diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index faad2bd787c7..3eec82d32fd4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) */ /* - * Update transiaction's maximum wait time, if debugging is enabled. + * Update transaction's maximum wait time, if debugging is enabled. * * In order for t_max_wait to be reliable, it must be protected by a * lock. But doing so will mean that start_this_handle() can not be @@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) * means that maximum wait time reported by the jbd2_run_stats * tracepoint will always be zero. */ -static inline void update_t_max_wait(transaction_t *transaction) +static inline void update_t_max_wait(transaction_t *transaction, + unsigned long ts) { #ifdef CONFIG_JBD2_DEBUG - unsigned long ts = jiffies; - if (jbd2_journal_enable_debug && time_after(transaction->t_start, ts)) { ts = jbd2_time_diff(ts, transaction->t_start); @@ -117,10 +116,11 @@ static inline void update_t_max_wait(transaction_t *transaction) static int start_this_handle(journal_t *journal, handle_t *handle, int gfp_mask) { - transaction_t *transaction; - int needed; - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; + transaction_t *transaction, *new_transaction = NULL; + tid_t tid; + int needed, need_to_start; + int nblocks = handle->h_buffer_credits; + unsigned long ts = jiffies; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -222,8 +222,11 @@ repeat: atomic_sub(nblocks, &transaction->t_outstanding_credits); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -268,7 +271,7 @@ repeat: /* OK, account for the buffers that this operation expects to * use and add the handle to the running transaction. */ - update_t_max_wait(transaction); + update_t_max_wait(transaction, ts); handle->h_transaction = transaction; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); @@ -313,7 +316,8 @@ static handle_t *new_handle(int nblocks) * This function is visible to journal users (like ext3fs), so is not * called with the journal already locked. * - * Return a pointer to a newly allocated handle, or NULL on failure + * Return a pointer to a newly allocated handle, or an ERR_PTR() value + * on failure. */ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) { @@ -442,7 +446,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - int ret; + tid_t tid; + int need_to_start, ret; /* If we've had an abort of any type, don't even think about * actually doing the restart! */ @@ -465,8 +470,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; @@ -914,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) */ JBUFFER_TRACE(jh, "cancelling revoke"); jbd2_journal_cancel_revoke(handle, jh); - jbd2_journal_put_journal_head(jh); out: + jbd2_journal_put_journal_head(jh); return err; } @@ -1396,7 +1404,7 @@ int jbd2_journal_stop(handle_t *handle) /* * Once we drop t_updates, if it goes to zero the transaction - * could start commiting on us and eventually disappear. So + * could start committing on us and eventually disappear. So * once we do this, we must not dereference transaction * pointer again. */ @@ -2140,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) jinode->i_next_transaction == transaction) goto done; + /* + * We only ever set this variable to 1 so the test is safe. Since + * t_need_data_flush is likely to be set, we do the test to save some + * cacheline bouncing + */ + if (!transaction->t_need_data_flush) + transaction->t_need_data_flush = 1; /* On some different transaction's list - should be * the committing one */ if (jinode->i_transaction) { |
