Commit 74278da9 authored by Dave Chinner's avatar Dave Chinner Committed by Josef Bacik
Browse files

inode: convert inode_sb_list_lock to per-sb



The process of reducing contention on per-superblock inode lists
starts with moving the locking to match the per-superblock inode
list. This takes the global lock out of the picture and reduces the
contention problems to within a single filesystem. This doesn't get
rid of contention as the locks still have global CPU scope, but it
does isolate operations on different superblocks form each other.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Tested-by: default avatarDave Chinner <dchinner@redhat.com>
parent cbedaac6
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{
	struct inode *inode, *old_inode = NULL;

	spin_lock(&inode_sb_list_lock);
	spin_lock(&blockdev_superblock->s_inode_list_lock);
	list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
		struct address_space *mapping = inode->i_mapping;

@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&blockdev_superblock->s_inode_list_lock);
		/*
		 * We hold a reference to 'inode' so it couldn't have been
		 * removed from s_inodes list while we dropped the
		 * inode_sb_list_lock.  We cannot iput the inode now as we can
		 * s_inode_list_lock  We cannot iput the inode now as we can
		 * be holding the last reference and we cannot iput it under
		 * inode_sb_list_lock. So we keep the reference and iput it
		 * s_inode_list_lock. So we keep the reference and iput it
		 * later.
		 */
		iput(old_inode);
@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)

		func(I_BDEV(inode), arg);

		spin_lock(&inode_sb_list_lock);
		spin_lock(&blockdev_superblock->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&blockdev_superblock->s_inode_list_lock);
	iput(old_inode);
}
+6 −4
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
	struct inode *inode, *toput_inode = NULL;

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
		spin_lock(&inode->i_lock);
		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&sb->s_inode_list_lock);

		invalidate_mapping_pages(inode->i_mapping, 0, -1);
		iput(toput_inode);
		toput_inode = inode;
		spin_lock(&inode_sb_list_lock);

		spin_lock(&sb->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);
	iput(toput_inode);
}

+6 −6
Original line number Diff line number Diff line
@@ -2124,7 +2124,7 @@ static void wait_sb_inodes(struct super_block *sb)
	 */
	WARN_ON(!rwsem_is_locked(&sb->s_umount));

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);

	/*
	 * Data integrity sync. Must wait for all pages under writeback,
@@ -2144,14 +2144,14 @@ static void wait_sb_inodes(struct super_block *sb)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&sb->s_inode_list_lock);

		/*
		 * We hold a reference to 'inode' so it couldn't have been
		 * removed from s_inodes list while we dropped the
		 * inode_sb_list_lock.  We cannot iput the inode now as we can
		 * s_inode_list_lock.  We cannot iput the inode now as we can
		 * be holding the last reference and we cannot iput it under
		 * inode_sb_list_lock. So we keep the reference and iput it
		 * s_inode_list_lock. So we keep the reference and iput it
		 * later.
		 */
		iput(old_inode);
@@ -2161,9 +2161,9 @@ static void wait_sb_inodes(struct super_block *sb)

		cond_resched();

		spin_lock(&inode_sb_list_lock);
		spin_lock(&sb->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);
	iput(old_inode);
}

+13 −15
Original line number Diff line number Diff line
@@ -28,8 +28,8 @@
 *   inode->i_state, inode->i_hash, __iget()
 * Inode LRU list locks protect:
 *   inode->i_sb->s_inode_lru, inode->i_lru
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
 * inode->i_sb->s_inode_list_lock protects:
 *   inode->i_sb->s_inodes, inode->i_sb_list
 * bdi->wb.list_lock protects:
 *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
 * inode_hash_lock protects:
@@ -37,7 +37,7 @@
 *
 * Lock ordering:
 *
 * inode_sb_list_lock
 * inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *     Inode LRU list locks
 *
@@ -45,7 +45,7 @@
 *   inode->i_lock
 *
 * inode_hash_lock
 *   inode_sb_list_lock
 *   inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *
 * iunique_lock
@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);

__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);

/*
 * Empty aops. Can be used for the cases where the user does not
 * define any of the address_space operations.
@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
 */
void inode_sb_list_add(struct inode *inode)
{
	spin_lock(&inode_sb_list_lock);
	spin_lock(&inode->i_sb->s_inode_list_lock);
	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&inode->i_sb->s_inode_list_lock);
}
EXPORT_SYMBOL_GPL(inode_sb_list_add);

static inline void inode_sb_list_del(struct inode *inode)
{
	if (!list_empty(&inode->i_sb_list)) {
		spin_lock(&inode_sb_list_lock);
		spin_lock(&inode->i_sb->s_inode_list_lock);
		list_del_init(&inode->i_sb_list);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&inode->i_sb->s_inode_list_lock);
	}
}

@@ -594,7 +592,7 @@ void evict_inodes(struct super_block *sb)
	struct inode *inode, *next;
	LIST_HEAD(dispose);

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
		if (atomic_read(&inode->i_count))
			continue;
@@ -610,7 +608,7 @@ void evict_inodes(struct super_block *sb)
		spin_unlock(&inode->i_lock);
		list_add(&inode->i_lru, &dispose);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);

	dispose_list(&dispose);
}
@@ -631,7 +629,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
	struct inode *inode, *next;
	LIST_HEAD(dispose);

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
		spin_lock(&inode->i_lock);
		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -654,7 +652,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
		spin_unlock(&inode->i_lock);
		list_add(&inode->i_lru, &dispose);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);

	dispose_list(&dispose);

@@ -890,7 +888,7 @@ struct inode *new_inode(struct super_block *sb)
{
	struct inode *inode;

	spin_lock_prefetch(&inode_sb_list_lock);
	spin_lock_prefetch(&sb->s_inode_list_lock);

	inode = new_inode_pseudo(sb);
	if (inode)
+0 −1
Original line number Diff line number Diff line
@@ -112,7 +112,6 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
/*
 * inode.c
 */
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);

Loading