Commit ff780b91 authored by Harshad Shirwadkar's avatar Harshad Shirwadkar Committed by Theodore Ts'o
Browse files

jbd2: add fast commit machinery



This functions adds necessary APIs needed in JBD2 layer for fast
commits.

Signed-off-by: default avatarHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20201015203802.3597742-5-harshadshirwadkar@gmail.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 6866d7b3
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -8,11 +8,19 @@
 * Ext4 fast commits routines.
 */
#include "ext4_jbd2.h"
/*
 * Fast commit cleanup routine. This is called after every fast commit and
 * full commit. full is true if we are called after a full commit.
 */
static void ext4_fc_cleanup(journal_t *journal, int full)
{
}

void ext4_fc_init(struct super_block *sb, journal_t *journal)
{
	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
		return;
	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
	if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
		pr_warn("Error while enabling fast commits, turning off.");
		ext4_clear_feature_fast_commit(sb);
+44 −0
Original line number Diff line number Diff line
@@ -206,6 +206,30 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
	return generic_writepages(mapping, &wbc);
}

/* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(struct jbd2_inode *jinode)
{

	if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
		return 0;

	trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
	return jbd2_journal_submit_inode_data_buffers(jinode);

}
EXPORT_SYMBOL(jbd2_submit_inode_data);

int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
	if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
		!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
		return 0;
	return filemap_fdatawait_range_keep_errors(
		jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
		jinode->i_dirty_end);
}
EXPORT_SYMBOL(jbd2_wait_inode_data);

/*
 * Submit all the data buffers of inode associated with the transaction to
 * disk.
@@ -415,6 +439,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
	J_ASSERT(journal->j_running_transaction != NULL);
	J_ASSERT(journal->j_committing_transaction == NULL);

	write_lock(&journal->j_state_lock);
	journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
	while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
		DEFINE_WAIT(wait);

		prepare_to_wait(&journal->j_fc_wait, &wait,
				TASK_UNINTERRUPTIBLE);
		write_unlock(&journal->j_state_lock);
		schedule();
		write_lock(&journal->j_state_lock);
		finish_wait(&journal->j_fc_wait, &wait);
	}
	write_unlock(&journal->j_state_lock);

	commit_transaction = journal->j_running_transaction;

	trace_jbd2_start_commit(journal, commit_transaction);
@@ -422,6 +460,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
			commit_transaction->t_tid);

	write_lock(&journal->j_state_lock);
	journal->j_fc_off = 0;
	J_ASSERT(commit_transaction->t_state == T_RUNNING);
	commit_transaction->t_state = T_LOCKED;

@@ -1121,12 +1160,16 @@ restart_loop:

	if (journal->j_commit_callback)
		journal->j_commit_callback(journal, commit_transaction);
	if (journal->j_fc_cleanup_callback)
		journal->j_fc_cleanup_callback(journal, 1);

	trace_jbd2_end_commit(journal, commit_transaction);
	jbd_debug(1, "JBD2: commit %d complete, head %d\n",
		  journal->j_commit_sequence, journal->j_tail_sequence);

	write_lock(&journal->j_state_lock);
	journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
	journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
	spin_lock(&journal->j_list_lock);
	commit_transaction->t_state = T_FINISHED;
	/* Check if the transaction can be dropped now that we are finished */
@@ -1138,6 +1181,7 @@ restart_loop:
	spin_unlock(&journal->j_list_lock);
	write_unlock(&journal->j_state_lock);
	wake_up(&journal->j_wait_done_commit);
	wake_up(&journal->j_fc_wait);

	/*
	 * Calculate overall stats
+189 −1
Original line number Diff line number Diff line
@@ -159,7 +159,9 @@ static void commit_timeout(struct timer_list *t)
 *
 * 1) COMMIT:  Every so often we need to commit the current state of the
 *    filesystem to disk.  The journal thread is responsible for writing
 *    all of the metadata buffers to disk.
 *    all of the metadata buffers to disk. If a fast commit is ongoing
 *    journal thread waits until it's done and then continues from
 *    there on.
 *
 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
 *    of the data in that part of the log has been rewritten elsewhere on
@@ -716,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
	return err;
}

/*
 * Start a fast commit. If there's an ongoing fast or full commit wait for
 * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
 * if a fast commit is not needed, either because there's an already a commit
 * going on or this tid has already been committed. Returns -EINVAL if no jbd2
 * commit has yet been performed.
 */
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
{
	/*
	 * Fast commits only allowed if at least one full commit has
	 * been processed.
	 */
	if (!journal->j_stats.ts_tid)
		return -EINVAL;

	if (tid <= journal->j_commit_sequence)
		return -EALREADY;

	write_lock(&journal->j_state_lock);
	if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
	    (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
		DEFINE_WAIT(wait);

		prepare_to_wait(&journal->j_fc_wait, &wait,
				TASK_UNINTERRUPTIBLE);
		write_unlock(&journal->j_state_lock);
		schedule();
		finish_wait(&journal->j_fc_wait, &wait);
		return -EALREADY;
	}
	journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
	write_unlock(&journal->j_state_lock);

	return 0;
}
EXPORT_SYMBOL(jbd2_fc_begin_commit);

/*
 * Stop a fast commit. If fallback is set, this function starts commit of
 * TID tid before any other fast commit can start.
 */
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
	if (journal->j_fc_cleanup_callback)
		journal->j_fc_cleanup_callback(journal, 0);
	write_lock(&journal->j_state_lock);
	journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
	if (fallback)
		journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
	write_unlock(&journal->j_state_lock);
	wake_up(&journal->j_fc_wait);
	if (fallback)
		return jbd2_complete_transaction(journal, tid);
	return 0;
}

int jbd2_fc_end_commit(journal_t *journal)
{
	return __jbd2_fc_end_commit(journal, 0, 0);
}
EXPORT_SYMBOL(jbd2_fc_end_commit);

int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
{
	return __jbd2_fc_end_commit(journal, tid, 1);
}
EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);

/* Return 1 when transaction with given tid has already committed. */
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
{
@@ -784,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
	return jbd2_journal_bmap(journal, blocknr, retp);
}

/* Map one fast commit buffer for use by the file system */
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
{
	unsigned long long pblock;
	unsigned long blocknr;
	int ret = 0;
	struct buffer_head *bh;
	int fc_off;

	*bh_out = NULL;
	write_lock(&journal->j_state_lock);

	if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
		fc_off = journal->j_fc_off;
		blocknr = journal->j_fc_first + fc_off;
		journal->j_fc_off++;
	} else {
		ret = -EINVAL;
	}
	write_unlock(&journal->j_state_lock);

	if (ret)
		return ret;

	ret = jbd2_journal_bmap(journal, blocknr, &pblock);
	if (ret)
		return ret;

	bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
	if (!bh)
		return -ENOMEM;

	lock_buffer(bh);

	clear_buffer_uptodate(bh);
	set_buffer_dirty(bh);
	unlock_buffer(bh);
	journal->j_fc_wbuf[fc_off] = bh;

	*bh_out = bh;

	return 0;
}
EXPORT_SYMBOL(jbd2_fc_get_buf);

/*
 * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
 * for completion.
 */
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
{
	struct buffer_head *bh;
	int i, j_fc_off;

	read_lock(&journal->j_state_lock);
	j_fc_off = journal->j_fc_off;
	read_unlock(&journal->j_state_lock);

	/*
	 * Wait in reverse order to minimize chances of us being woken up before
	 * all IOs have completed
	 */
	for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
		bh = journal->j_fc_wbuf[i];
		wait_on_buffer(bh);
		put_bh(bh);
		journal->j_fc_wbuf[i] = NULL;
		if (unlikely(!buffer_uptodate(bh)))
			return -EIO;
	}

	return 0;
}
EXPORT_SYMBOL(jbd2_fc_wait_bufs);

/*
 * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
 * for completion.
 */
int jbd2_fc_release_bufs(journal_t *journal)
{
	struct buffer_head *bh;
	int i, j_fc_off;

	read_lock(&journal->j_state_lock);
	j_fc_off = journal->j_fc_off;
	read_unlock(&journal->j_state_lock);

	/*
	 * Wait in reverse order to minimize chances of us being woken up before
	 * all IOs have completed
	 */
	for (i = j_fc_off - 1; i >= 0; i--) {
		bh = journal->j_fc_wbuf[i];
		if (!bh)
			break;
		put_bh(bh);
		journal->j_fc_wbuf[i] = NULL;
	}

	return 0;
}
EXPORT_SYMBOL(jbd2_fc_release_bufs);

/*
 * Conversion of logical to physical block numbers for the journal
 *
@@ -1142,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
	init_waitqueue_head(&journal->j_wait_commit);
	init_waitqueue_head(&journal->j_wait_updates);
	init_waitqueue_head(&journal->j_wait_reserved);
	init_waitqueue_head(&journal->j_fc_wait);
	mutex_init(&journal->j_abort_mutex);
	mutex_init(&journal->j_barrier);
	mutex_init(&journal->j_checkpoint_mutex);
@@ -1495,6 +1671,7 @@ out:
static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
{
	journal_superblock_t *sb = journal->j_superblock;
	bool had_fast_commit = false;

	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
	lock_buffer(journal->j_sb_buffer);
@@ -1508,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)

	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
	sb->s_start    = cpu_to_be32(0);
	if (jbd2_has_feature_fast_commit(journal)) {
		/*
		 * When journal is clean, no need to commit fast commit flag and
		 * make file system incompatible with older kernels.
		 */
		jbd2_clear_feature_fast_commit(journal);
		had_fast_commit = true;
	}

	jbd2_write_superblock(journal, write_op);

	if (had_fast_commit)
		jbd2_set_feature_fast_commit(journal);

	/* Log is no longer empty */
	write_lock(&journal->j_state_lock);
	journal->j_flags |= JBD2_FLUSHED;
+27 −0
Original line number Diff line number Diff line
@@ -861,6 +861,13 @@ struct journal_s
	 */
	wait_queue_head_t	j_wait_reserved;

	/**
	 * @j_fc_wait:
	 *
	 * Wait queue to wait for completion of async fast commits.
	 */
	wait_queue_head_t	j_fc_wait;

	/**
	 * @j_checkpoint_mutex:
	 *
@@ -1232,6 +1239,15 @@ struct journal_s
	 */
	struct lockdep_map	j_trans_commit_map;
#endif

	/**
	 * @j_fc_cleanup_callback:
	 *
	 * Clean-up after fast commit or full commit. JBD2 calls this function
	 * after every commit operation.
	 */
	void (*j_fc_cleanup_callback)(struct journal_s *journal, int);

};

#define jbd2_might_wait_for_commit(j) \
@@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
#define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
						 * data write error in ordered
						 * mode */
#define JBD2_FAST_COMMIT_ONGOING	0x100	/* Fast commit is ongoing */
#define JBD2_FULL_COMMIT_ONGOING	0x200	/* Full commit is ongoing */

/*
 * Function declarations for the journaling transaction and buffer
@@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *);

/* Fast commit related APIs */
int jbd2_fc_init(journal_t *journal, int num_fc_blks);
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
int jbd2_fc_end_commit(journal_t *journal);
int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
int jbd2_submit_inode_data(struct jbd2_inode *jinode);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
int jbd2_fc_release_bufs(journal_t *journal);

/*
 * is_journal_abort
 *