Commit d776aaa9 authored by Henry Burns's avatar Henry Burns Committed by Linus Torvalds
Browse files

mm/z3fold.c: fix race between migration and destruction

In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq).
However, we have no guarantee that migration isn't happening in the
background at that time.

Migration directly calls queue_work_on(pool->compact_wq), if destruction
wins that race we are using a destroyed workqueue.

Link: http://lkml.kernel.org/r/20190809213828.202833-1-henryburns@google.com


Signed-off-by: default avatarHenry Burns <henryburns@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Jonathan Adams <jwadams@google.com>
Cc: Henry Burns <henrywolfeburns@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 36146921
Loading
Loading
Loading
Loading
+89 −0
Original line number Original line Diff line number Diff line
@@ -41,6 +41,7 @@
#include <linux/workqueue.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/zpool.h>
#include <linux/zpool.h>
#include <linux/magic.h>
#include <linux/magic.h>


@@ -145,6 +146,8 @@ struct z3fold_header {
 * @release_wq:	workqueue for safe page release
 * @release_wq:	workqueue for safe page release
 * @work:	work_struct for safe page release
 * @work:	work_struct for safe page release
 * @inode:	inode for z3fold pseudo filesystem
 * @inode:	inode for z3fold pseudo filesystem
 * @destroying: bool to stop migration once we start destruction
 * @isolated: int to count the number of pages currently in isolation
 *
 *
 * This structure is allocated at pool creation time and maintains metadata
 * This structure is allocated at pool creation time and maintains metadata
 * pertaining to a particular z3fold pool.
 * pertaining to a particular z3fold pool.
@@ -163,8 +166,11 @@ struct z3fold_pool {
	const struct zpool_ops *zpool_ops;
	const struct zpool_ops *zpool_ops;
	struct workqueue_struct *compact_wq;
	struct workqueue_struct *compact_wq;
	struct workqueue_struct *release_wq;
	struct workqueue_struct *release_wq;
	struct wait_queue_head isolate_wait;
	struct work_struct work;
	struct work_struct work;
	struct inode *inode;
	struct inode *inode;
	bool destroying;
	int isolated;
};
};


/*
/*
@@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
		goto out_c;
		goto out_c;
	spin_lock_init(&pool->lock);
	spin_lock_init(&pool->lock);
	spin_lock_init(&pool->stale_lock);
	spin_lock_init(&pool->stale_lock);
	init_waitqueue_head(&pool->isolate_wait);
	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
	if (!pool->unbuddied)
	if (!pool->unbuddied)
		goto out_pool;
		goto out_pool;
@@ -808,6 +815,15 @@ out:
	return NULL;
	return NULL;
}
}


static bool pool_isolated_are_drained(struct z3fold_pool *pool)
{
	bool ret;

	spin_lock(&pool->lock);
	ret = pool->isolated == 0;
	spin_unlock(&pool->lock);
	return ret;
}
/**
/**
 * z3fold_destroy_pool() - destroys an existing z3fold pool
 * z3fold_destroy_pool() - destroys an existing z3fold pool
 * @pool:	the z3fold pool to be destroyed
 * @pool:	the z3fold pool to be destroyed
@@ -817,6 +833,22 @@ out:
static void z3fold_destroy_pool(struct z3fold_pool *pool)
static void z3fold_destroy_pool(struct z3fold_pool *pool)
{
{
	kmem_cache_destroy(pool->c_handle);
	kmem_cache_destroy(pool->c_handle);
	/*
	 * We set pool-> destroying under lock to ensure that
	 * z3fold_page_isolate() sees any changes to destroying. This way we
	 * avoid the need for any memory barriers.
	 */

	spin_lock(&pool->lock);
	pool->destroying = true;
	spin_unlock(&pool->lock);

	/*
	 * We need to ensure that no pages are being migrated while we destroy
	 * these workqueues, as migration can queue work on either of the
	 * workqueues.
	 */
	wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));


	/*
	/*
	 * We need to destroy pool->compact_wq before pool->release_wq,
	 * We need to destroy pool->compact_wq before pool->release_wq,
@@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
	return atomic64_read(&pool->pages_nr);
	return atomic64_read(&pool->pages_nr);
}
}


/*
 * z3fold_dec_isolated() expects to be called while pool->lock is held.
 */
static void z3fold_dec_isolated(struct z3fold_pool *pool)
{
	assert_spin_locked(&pool->lock);
	VM_BUG_ON(pool->isolated <= 0);
	pool->isolated--;

	/*
	 * If we have no more isolated pages, we have to see if
	 * z3fold_destroy_pool() is waiting for a signal.
	 */
	if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
		wake_up_all(&pool->isolate_wait);
}

static void z3fold_inc_isolated(struct z3fold_pool *pool)
{
	pool->isolated++;
}

static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
{
{
	struct z3fold_header *zhdr;
	struct z3fold_header *zhdr;
@@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
		spin_lock(&pool->lock);
		spin_lock(&pool->lock);
		if (!list_empty(&page->lru))
		if (!list_empty(&page->lru))
			list_del(&page->lru);
			list_del(&page->lru);
		/*
		 * We need to check for destruction while holding pool->lock, as
		 * otherwise destruction could see 0 isolated pages, and
		 * proceed.
		 */
		if (unlikely(pool->destroying)) {
			spin_unlock(&pool->lock);
			/*
			 * If this page isn't stale, somebody else holds a
			 * reference to it. Let't drop our refcount so that they
			 * can call the release logic.
			 */
			if (unlikely(kref_put(&zhdr->refcount,
					      release_z3fold_page_locked))) {
				/*
				 * If we get here we have kref problems, so we
				 * should freak out.
				 */
				WARN(1, "Z3fold is experiencing kref problems\n");
				return false;
			}
			z3fold_page_unlock(zhdr);
			return false;
		}


		z3fold_inc_isolated(pool);
		spin_unlock(&pool->lock);
		spin_unlock(&pool->lock);
		z3fold_page_unlock(zhdr);
		z3fold_page_unlock(zhdr);
		return true;
		return true;
@@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa


	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);


	spin_lock(&pool->lock);
	z3fold_dec_isolated(pool);
	spin_unlock(&pool->lock);

	page_mapcount_reset(page);
	page_mapcount_reset(page);
	put_page(page);
	put_page(page);
	return 0;
	return 0;
@@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page)
	INIT_LIST_HEAD(&page->lru);
	INIT_LIST_HEAD(&page->lru);
	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
		atomic64_dec(&pool->pages_nr);
		atomic64_dec(&pool->pages_nr);
		spin_lock(&pool->lock);
		z3fold_dec_isolated(pool);
		spin_unlock(&pool->lock);
		return;
		return;
	}
	}
	spin_lock(&pool->lock);
	spin_lock(&pool->lock);
	list_add(&page->lru, &pool->lru);
	list_add(&page->lru, &pool->lru);
	z3fold_dec_isolated(pool);
	spin_unlock(&pool->lock);
	spin_unlock(&pool->lock);
	z3fold_page_unlock(zhdr);
	z3fold_page_unlock(zhdr);
}
}