Commit 46417064 authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Theodore Ts'o
Browse files

jbd2: Make state lock a spinlock



Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep
on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held
region because bit spinlocks disable preemption even on RT.

A first attempt was to replace state lock with a spinlock placed in struct
buffer_head and make the locking conditional on PREEMPT_RT and
DEBUG_BIT_SPINLOCKS.

Jan pointed out that there is a 4 byte hole in struct journal_head where a
regular spinlock fits in and he would not object to convert the state lock
to a spinlock unconditionally.

Aside of solving the RT problem, this also gains lockdep coverage for the
journal head state lock (bit-spinlocks are not covered by lockdep as it's
hard to fit a lockdep map into a single bit).

The trivial change would have been to convert the jbd_*lock_bh_state()
inlines, but that comes with the downside that these functions take a
buffer head pointer which needs to be converted to a journal head pointer
which adds another level of indirection.

As almost all functions which use this lock have a journal head pointer
readily available, it makes more sense to remove the lock helper inlines
and write out spin_*lock() at all call sites.

Fixup all locking comments as well.

Suggested-by: default avatarJan Kara <jack@suse.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Jan Kara <jack@suse.com>
Cc: linux-ext4@vger.kernel.org
Link: https://lore.kernel.org/r/20190809124233.13277-7-jack@suse.cz


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2e710ff0
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
		if (jh->b_committed_data) {
			struct buffer_head *bh = jh2bh(jh);

			jbd_lock_bh_state(bh);
			spin_lock(&jh->b_state_lock);
			jbd2_free(jh->b_committed_data, bh->b_size);
			jh->b_committed_data = NULL;
			jbd_unlock_bh_state(bh);
			spin_unlock(&jh->b_state_lock);
		}
		jbd2_journal_refile_buffer(journal, jh);
	}
@@ -928,7 +928,7 @@ restart_loop:
		 * done with it.
		 */
		get_bh(bh);
		jbd_lock_bh_state(bh);
		spin_lock(&jh->b_state_lock);
		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);

		/*
@@ -1024,7 +1024,7 @@ restart_loop:
		}
		JBUFFER_TRACE(jh, "refile or unfile buffer");
		drop_ref = __jbd2_journal_refile_buffer(jh);
		jbd_unlock_bh_state(bh);
		spin_unlock(&jh->b_state_lock);
		if (drop_ref)
			jbd2_journal_put_journal_head(jh);
		if (try_to_free)
+6 −4
Original line number Diff line number Diff line
@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
	/* keep subsequent assertions sane */
	atomic_set(&new_bh->b_count, 1);

	jbd_lock_bh_state(bh_in);
	spin_lock(&jh_in->b_state_lock);
repeat:
	/*
	 * If a new transaction has already done a buffer copy-out, then
@@ -405,13 +405,13 @@ repeat:
	if (need_copy_out && !done_copy_out) {
		char *tmp;

		jbd_unlock_bh_state(bh_in);
		spin_unlock(&jh_in->b_state_lock);
		tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
		if (!tmp) {
			brelse(new_bh);
			return -ENOMEM;
		}
		jbd_lock_bh_state(bh_in);
		spin_lock(&jh_in->b_state_lock);
		if (jh_in->b_frozen_data) {
			jbd2_free(tmp, bh_in->b_size);
			goto repeat;
@@ -464,7 +464,7 @@ repeat:
	__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
	spin_unlock(&journal->j_list_lock);
	set_buffer_shadow(bh_in);
	jbd_unlock_bh_state(bh_in);
	spin_unlock(&jh_in->b_state_lock);

	return do_escape | (done_copy_out << 1);
}
@@ -2410,6 +2410,8 @@ static struct journal_head *journal_alloc_journal_head(void)
		ret = kmem_cache_zalloc(jbd2_journal_head_cache,
				GFP_NOFS | __GFP_NOFAIL);
	}
	if (ret)
		spin_lock_init(&ret->b_state_lock);
	return ret;
}

+47 −53
Original line number Diff line number Diff line
@@ -879,7 +879,7 @@ repeat:

 	start_lock = jiffies;
	lock_buffer(bh);
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);

	/* If it takes too long to lock the buffer, trace it */
	time_lock = jbd2_time_diff(start_lock, jiffies);
@@ -929,7 +929,7 @@ repeat:

	error = -EROFS;
	if (is_handle_aborted(handle)) {
		jbd_unlock_bh_state(bh);
		spin_unlock(&jh->b_state_lock);
		goto out;
	}
	error = 0;
@@ -993,7 +993,7 @@ repeat:
	 */
	if (buffer_shadow(bh)) {
		JBUFFER_TRACE(jh, "on shadow: sleep");
		jbd_unlock_bh_state(bh);
		spin_unlock(&jh->b_state_lock);
		wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
		goto repeat;
	}
@@ -1014,7 +1014,7 @@ repeat:
		JBUFFER_TRACE(jh, "generate frozen data");
		if (!frozen_buffer) {
			JBUFFER_TRACE(jh, "allocate memory for buffer");
			jbd_unlock_bh_state(bh);
			spin_unlock(&jh->b_state_lock);
			frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
						   GFP_NOFS | __GFP_NOFAIL);
			goto repeat;
@@ -1033,7 +1033,7 @@ attach_next:
	jh->b_next_transaction = transaction;

done:
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);

	/*
	 * If we are about to journal a buffer, then any revoke pending on it is
@@ -1172,7 +1172,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
	 * that case: the transaction must have deleted the buffer for it to be
	 * reused here.
	 */
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);
	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
		jh->b_transaction == NULL ||
		(jh->b_transaction == journal->j_committing_transaction &&
@@ -1207,7 +1207,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
		jh->b_next_transaction = transaction;
		spin_unlock(&journal->j_list_lock);
	}
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);

	/*
	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
@@ -1275,13 +1275,13 @@ repeat:
		committed_data = jbd2_alloc(jh2bh(jh)->b_size,
					    GFP_NOFS|__GFP_NOFAIL);

	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);
	if (!jh->b_committed_data) {
		/* Copy out the current buffer contents into the
		 * preserved, committed copy. */
		JBUFFER_TRACE(jh, "generate b_committed data");
		if (!committed_data) {
			jbd_unlock_bh_state(bh);
			spin_unlock(&jh->b_state_lock);
			goto repeat;
		}

@@ -1289,7 +1289,7 @@ repeat:
		committed_data = NULL;
		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
	}
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);
out:
	jbd2_journal_put_journal_head(jh);
	if (unlikely(committed_data))
@@ -1390,16 +1390,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
	 */
	if (jh->b_transaction != transaction &&
	    jh->b_next_transaction != transaction) {
		jbd_lock_bh_state(bh);
		spin_lock(&jh->b_state_lock);
		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
				jh->b_next_transaction == transaction);
		jbd_unlock_bh_state(bh);
		spin_unlock(&jh->b_state_lock);
	}
	if (jh->b_modified == 1) {
		/* If it's in our transaction it must be in BJ_Metadata list. */
		if (jh->b_transaction == transaction &&
		    jh->b_jlist != BJ_Metadata) {
			jbd_lock_bh_state(bh);
			spin_lock(&jh->b_state_lock);
			if (jh->b_transaction == transaction &&
			    jh->b_jlist != BJ_Metadata)
				pr_err("JBD2: assertion failure: h_type=%u "
@@ -1409,13 +1409,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
				       jh->b_jlist);
			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
					jh->b_jlist == BJ_Metadata);
			jbd_unlock_bh_state(bh);
			spin_unlock(&jh->b_state_lock);
		}
		goto out;
	}

	journal = transaction->t_journal;
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);

	if (jh->b_modified == 0) {
		/*
@@ -1501,7 +1501,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
	__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
	spin_unlock(&journal->j_list_lock);
out_unlock_bh:
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);
out:
	JBUFFER_TRACE(jh, "exit");
	return ret;
@@ -1539,11 +1539,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)

	BUFFER_TRACE(bh, "entry");

	jbd_lock_bh_state(bh);
	jh = jbd2_journal_grab_journal_head(bh);
	if (!jh) {
		__bforget(bh);
		return 0;
	}

	if (!buffer_jbd(bh))
		goto not_jbd;
	jh = bh2jh(bh);
	spin_lock(&jh->b_state_lock);

	/* Critical error: attempting to delete a bitmap buffer, maybe?
	 * Don't do any jbd operations, and return an error. */
@@ -1664,18 +1666,14 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
		spin_unlock(&journal->j_list_lock);
	}
drop:
	jbd_unlock_bh_state(bh);
	__brelse(bh);
	spin_unlock(&jh->b_state_lock);
	jbd2_journal_put_journal_head(jh);
	if (drop_reserve) {
		/* no need to reserve log space for this block -bzzz */
		handle->h_buffer_credits++;
	}
	return err;

not_jbd:
	jbd_unlock_bh_state(bh);
	__bforget(bh);
	goto drop;
}

/**
@@ -1874,7 +1872,7 @@ free_and_exit:
 *
 * j_list_lock is held.
 *
 * jbd_lock_bh_state(jh2bh(jh)) is held.
 * jh->b_state_lock is held.
 */

static inline void
@@ -1898,7 +1896,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
 *
 * Called with j_list_lock held, and the journal may not be locked.
 *
 * jbd_lock_bh_state(jh2bh(jh)) is held.
 * jh->b_state_lock is held.
 */

static inline void
@@ -1930,7 +1928,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
	transaction_t *transaction;
	struct buffer_head *bh = jh2bh(jh);

	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	lockdep_assert_held(&jh->b_state_lock);
	transaction = jh->b_transaction;
	if (transaction)
		assert_spin_locked(&transaction->t_journal->j_list_lock);
@@ -1984,11 +1982,11 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)

	/* Get reference so that buffer cannot be freed before we unlock it */
	get_bh(bh);
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);
	spin_lock(&journal->j_list_lock);
	__jbd2_journal_unfile_buffer(jh);
	spin_unlock(&journal->j_list_lock);
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);
	jbd2_journal_put_journal_head(jh);
	__brelse(bh);
}
@@ -1996,7 +1994,7 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
/*
 * Called from jbd2_journal_try_to_free_buffers().
 *
 * Called under jbd_lock_bh_state(bh)
 * Called under jh->b_state_lock
 */
static void
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
@@ -2083,10 +2081,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
		if (!jh)
			continue;

		jbd_lock_bh_state(bh);
		spin_lock(&jh->b_state_lock);
		__journal_try_to_free_buffer(journal, bh);
		spin_unlock(&jh->b_state_lock);
		jbd2_journal_put_journal_head(jh);
		jbd_unlock_bh_state(bh);
		if (buffer_jbd(bh))
			goto busy;
	} while ((bh = bh->b_this_page) != head);
@@ -2107,7 +2105,7 @@ busy:
 *
 * Called under j_list_lock.
 *
 * Called under jbd_lock_bh_state(bh).
 * Called under jh->b_state_lock.
 */
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
{
@@ -2201,7 +2199,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,

	/* OK, we have data buffer in journaled mode */
	write_lock(&journal->j_state_lock);
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);
	spin_lock(&journal->j_list_lock);

	/*
@@ -2282,10 +2280,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
		 * for commit and try again.
		 */
		if (partial_page) {
			jbd2_journal_put_journal_head(jh);
			spin_unlock(&journal->j_list_lock);
			jbd_unlock_bh_state(bh);
			spin_unlock(&jh->b_state_lock);
			write_unlock(&journal->j_state_lock);
			jbd2_journal_put_journal_head(jh);
			return -EBUSY;
		}
		/*
@@ -2297,10 +2295,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
		set_buffer_freed(bh);
		if (journal->j_running_transaction && buffer_jbddirty(bh))
			jh->b_next_transaction = journal->j_running_transaction;
		jbd2_journal_put_journal_head(jh);
		spin_unlock(&journal->j_list_lock);
		jbd_unlock_bh_state(bh);
		spin_unlock(&jh->b_state_lock);
		write_unlock(&journal->j_state_lock);
		jbd2_journal_put_journal_head(jh);
		return 0;
	} else {
		/* Good, the buffer belongs to the running transaction.
@@ -2324,10 +2322,10 @@ zap_buffer:
	 * here.
	 */
	jh->b_modified = 0;
	jbd2_journal_put_journal_head(jh);
	spin_unlock(&journal->j_list_lock);
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);
	write_unlock(&journal->j_state_lock);
	jbd2_journal_put_journal_head(jh);
zap_buffer_unlocked:
	clear_buffer_dirty(bh);
	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2414,7 +2412,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
	int was_dirty = 0;
	struct buffer_head *bh = jh2bh(jh);

	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	lockdep_assert_held(&jh->b_state_lock);
	assert_spin_locked(&transaction->t_journal->j_list_lock);

	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
@@ -2476,11 +2474,11 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
void jbd2_journal_file_buffer(struct journal_head *jh,
				transaction_t *transaction, int jlist)
{
	jbd_lock_bh_state(jh2bh(jh));
	spin_lock(&jh->b_state_lock);
	spin_lock(&transaction->t_journal->j_list_lock);
	__jbd2_journal_file_buffer(jh, transaction, jlist);
	spin_unlock(&transaction->t_journal->j_list_lock);
	jbd_unlock_bh_state(jh2bh(jh));
	spin_unlock(&jh->b_state_lock);
}

/*
@@ -2490,7 +2488,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
 * buffer on that transaction's metadata list.
 *
 * Called under j_list_lock
 * Called under jbd_lock_bh_state(jh2bh(jh))
 * Called under jh->b_state_lock
 *
 * When this function returns true, there's no next transaction to refile to
 * and the caller has to drop jh reference through
@@ -2501,7 +2499,7 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
	int was_dirty, jlist;
	struct buffer_head *bh = jh2bh(jh);

	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
	lockdep_assert_held(&jh->b_state_lock);
	if (jh->b_transaction)
		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);

@@ -2547,17 +2545,13 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
 */
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
	struct buffer_head *bh = jh2bh(jh);
	bool drop;

	/* Get reference so that buffer cannot be freed before we unlock it */
	get_bh(bh);
	jbd_lock_bh_state(bh);
	spin_lock(&jh->b_state_lock);
	spin_lock(&journal->j_list_lock);
	drop = __jbd2_journal_refile_buffer(jh);
	jbd_unlock_bh_state(bh);
	spin_unlock(&jh->b_state_lock);
	spin_unlock(&journal->j_list_lock);
	__brelse(bh);
	if (drop)
		jbd2_journal_put_journal_head(jh);
}
+11 −8
Original line number Diff line number Diff line
@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
					 int nr)
{
	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
	struct journal_head *jh;
	int ret;

	if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
	if (!buffer_jbd(bg_bh))
		return 1;

	jbd_lock_bh_state(bg_bh);
	bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
	jh = bh2jh(bg_bh);
	spin_lock(&jh->b_state_lock);
	bg = (struct ocfs2_group_desc *) jh->b_committed_data;
	if (bg)
		ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
	else
		ret = 1;
	jbd_unlock_bh_state(bg_bh);
	spin_unlock(&jh->b_state_lock);

	return ret;
}
@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
	int status;
	unsigned int tmp;
	struct ocfs2_group_desc *undo_bg = NULL;
	struct journal_head *jh;

	/* The caller got this descriptor from
	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
		goto bail;
	}

	jh = bh2jh(group_bh);
	if (undo_fn) {
		jbd_lock_bh_state(group_bh);
		undo_bg = (struct ocfs2_group_desc *)
					bh2jh(group_bh)->b_committed_data;
		spin_lock(&jh->b_state_lock);
		undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
		BUG_ON(!undo_bg);
	}

@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
		if (undo_fn)
			jbd_unlock_bh_state(group_bh);
			spin_unlock(&jh->b_state_lock);
		return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
				   (unsigned long long)le64_to_cpu(bg->bg_blkno),
				   le16_to_cpu(bg->bg_bits),
@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
	}

	if (undo_fn)
		jbd_unlock_bh_state(group_bh);
		spin_unlock(&jh->b_state_lock);

	ocfs2_journal_dirty(handle, group_bh);
bail:
+2 −18
Original line number Diff line number Diff line
@@ -313,7 +313,6 @@ enum jbd_state_bits {
	BH_Revoked,		/* Has been revoked from the log */
	BH_RevokeValid,		/* Revoked flag is valid */
	BH_JBDDirty,		/* Is dirty but journaled */
	BH_State,		/* Pins most journal_head state */
	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
	BH_Shadow,		/* IO on shadow buffer is running */
	BH_Verified,		/* Metadata block has been verified ok */
@@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
	return bh->b_private;
}

static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
	bit_spin_lock(BH_State, &bh->b_state);
}

static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
	return bit_spin_is_locked(BH_State, &bh->b_state);
}

static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
	bit_spin_unlock(BH_State, &bh->b_state);
}

static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
	bit_spin_lock(BH_JournalHead, &bh->b_state);
@@ -551,9 +535,9 @@ struct transaction_chp_stats_s {
 *      ->jbd_lock_bh_journal_head()	(This is "innermost")
 *
 *    j_state_lock
 *    ->jbd_lock_bh_state()
 *    ->b_state_lock
 *
 *    jbd_lock_bh_state()
 *    b_state_lock
 *    ->j_list_lock
 *
 *    j_state_lock
Loading