Commit e88bd826 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull erofs updates from Gao Xiang:
 "This cycle we got rid of magical page->mapping type marks for
  temporary pages which had some concern before, now such usage is
  replaced with specific page->private.

  Also switch to inplace I/O instead of allocating extra cached pages to
  avoid direct reclaim under low memory scenario.

  There are some bmap bugfix and minor cleanups as well"

* tag 'erofs-for-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: avoid using generic_block_bmap
  erofs: force inplace I/O under low memory scenario
  erofs: simplify try_to_claim_pcluster()
  erofs: insert to managed cache after adding to pcl
  erofs: get rid of magical Z_EROFS_MAPPING_STAGING
  erofs: remove a void EROFS_VERSION macro set in Makefile
parents 1a50ede2 d8b3df8b
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only

EROFS_VERSION = "1.0"

ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"

obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
+41 −13
Original line number Diff line number Diff line
@@ -26,30 +26,58 @@ struct z_erofs_decompress_req {
	bool inplace_io, partial_decoding;
};

/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)

/*
 * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
 * used to mark temporary allocated pages from other
 * file/cached pages and NULL mapping pages.
 * For all pages in a pcluster, page->private should be one of
 * Type                         Last 2bits      page->private
 * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
 * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
 * cached/managed page          00              pointer to z_erofs_pcluster
 * online page (file-backed,    01/10/11        sub-index << 2 | count
 *              some pages can be used for inplace I/O)
 *
 * page->mapping should be one of
 * Type                 page->mapping
 * short-lived page     NULL
 * preallocated page    NULL
 * cached/managed page  non-NULL or NULL (invalidated/truncated page)
 * online page          non-NULL
 *
 * For all managed pages, PG_private should be set with 1 extra refcount,
 * which is used for page reclaim / migration.
 */
#define Z_EROFS_MAPPING_STAGING         ((void *)0x5A110C8D)

/* check if a page is marked as staging */
static inline bool z_erofs_page_is_staging(struct page *page)
/*
 * short-lived pages are pages directly from buddy system with specific
 * page->private (no need to set PagePrivate since these are non-LRU /
 * non-movable pages and bypass reclaim / migration code).
 */
static inline bool z_erofs_is_shortlived_page(struct page *page)
{
	return page->mapping == Z_EROFS_MAPPING_STAGING;
	if (page->private != Z_EROFS_SHORTLIVED_PAGE)
		return false;

	DBG_BUGON(page->mapping);
	return true;
}

static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
					      struct page *page)
{
	if (!z_erofs_page_is_staging(page))
	if (!z_erofs_is_shortlived_page(page))
		return false;

	/* staging pages should not be used by others at the same time */
	if (page_ref_count(page) > 1)
	/* short-lived pages should not be used by others at the same time */
	if (page_ref_count(page) > 1) {
		put_page(page);
	else
	} else {
		/* follow the pcluster rule above. */
		set_page_private(page, 0);
		list_add(&page->lru, pagepool);
	}
	return true;
}

+7 −19
Original line number Diff line number Diff line
@@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
		submit_bio(bio);
}

static int erofs_get_block(struct inode *inode, sector_t iblock,
			   struct buffer_head *bh, int create)
{
	struct erofs_map_blocks map = {
		.m_la = iblock << 9,
	};
	int err;

	err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
	if (err)
		return err;

	if (map.m_flags & EROFS_MAP_MAPPED)
		bh->b_blocknr = erofs_blknr(map.m_pa);

	return err;
}

static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
{
	struct inode *inode = mapping->host;
	struct erofs_map_blocks map = {
		.m_la = blknr_to_addr(block),
	};

	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
@@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
			return 0;
	}

	return generic_block_bmap(mapping, block, erofs_get_block);
	if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW))
		return erofs_blknr(map.m_pa);

	return 0;
}

/* for uncompressed (aligned) files and raw access for other files */
+1 −1
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
			victim = erofs_allocpage(pagepool, GFP_KERNEL);
			if (!victim)
				return -ENOMEM;
			victim->mapping = Z_EROFS_MAPPING_STAGING;
			set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
		}
		rq->out[i] = victim;
	}
+99 −73
Original line number Diff line number Diff line
@@ -20,6 +20,11 @@
enum z_erofs_cache_alloctype {
	DONTALLOC,	/* don't allocate any cached pages */
	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
	/*
	 * try to use cached I/O if page allocation succeeds or fallback
	 * to in-place I/O instead to avoid any direct reclaim.
	 */
	TRYALLOC,
};

/*
@@ -154,13 +159,16 @@ static DEFINE_MUTEX(z_pagemap_global_lock);

static void preload_compressed_pages(struct z_erofs_collector *clt,
				     struct address_space *mc,
				     enum z_erofs_cache_alloctype type)
				     enum z_erofs_cache_alloctype type,
				     struct list_head *pagepool)
{
	const struct z_erofs_pcluster *pcl = clt->pcl;
	const unsigned int clusterpages = BIT(pcl->clusterbits);
	struct page **pages = clt->compressedpages;
	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
	bool standalone = true;
	gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;

	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
		return;
@@ -168,6 +176,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
		struct page *page;
		compressed_page_t t;
		struct page *newpage = NULL;

		/* the compressed page was loaded before */
		if (READ_ONCE(*pages))
@@ -179,7 +188,15 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
			t = tag_compressed_page_justfound(page);
		} else if (type == DELAYEDALLOC) {
			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
		} else if (type == TRYALLOC) {
			newpage = erofs_allocpage(pagepool, gfp);
			if (!newpage)
				goto dontalloc;

			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
			t = tag_compressed_page_justfound(newpage);
		} else {	/* DONTALLOC */
dontalloc:
			if (standalone)
				clt->compressedpages = pages;
			standalone = false;
@@ -189,8 +206,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
			continue;

		if (page)
		if (page) {
			put_page(page);
		} else if (newpage) {
			set_page_private(newpage, 0);
			list_add(&newpage->lru, pagepool);
		}
	}

	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -226,11 +247,8 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,

		/* barrier is implied in the following 'unlock_page' */
		WRITE_ONCE(pcl->compressed_pages[i], NULL);
		set_page_private(page, 0);
		ClearPagePrivate(page);

		detach_page_private(page);
		unlock_page(page);
		put_page(page);
	}
	return 0;
}
@@ -254,10 +272,8 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
		}
		erofs_workgroup_unfreeze(&pcl->obj, 1);

		if (ret) {
			ClearPagePrivate(page);
			put_page(page);
		}
		if (ret)
			detach_page_private(page);
	}
	return ret;
}
@@ -297,34 +313,33 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
	return ret ? 0 : -EAGAIN;
}

static enum z_erofs_collectmode
try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
		      z_erofs_next_pcluster_t *owned_head)
static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
{
	/* let's claim these following types of pclusters */
retry:
	if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
		/* type 1, nil pcluster */
		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
			    *owned_head) != Z_EROFS_PCLUSTER_NIL)
			goto retry;
	struct z_erofs_pcluster *pcl = clt->pcl;
	z_erofs_next_pcluster_t *owned_head = &clt->owned_head;

	/* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
	if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
		    *owned_head) == Z_EROFS_PCLUSTER_NIL) {
		*owned_head = &pcl->next;
		/* lucky, I am the followee :) */
		return COLLECT_PRIMARY_FOLLOWED;
	} else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
		/* so we can attach this pcluster to our submission chain. */
		clt->mode = COLLECT_PRIMARY_FOLLOWED;
		return;
	}

	/*
		 * type 2, link to the end of a existing open chain,
		 * be careful that its submission itself is governed
		 * by the original owned chain.
	 * type 2, link to the end of an existing open chain, be careful
	 * that its submission is controlled by the original attached chain.
	 */
	if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
			    *owned_head) != Z_EROFS_PCLUSTER_TAIL)
			goto retry;
		    *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
		*owned_head = Z_EROFS_PCLUSTER_TAIL;
		return COLLECT_PRIMARY_HOOKED;
		clt->mode = COLLECT_PRIMARY_HOOKED;
		clt->tailpcl = NULL;
		return;
	}
	return COLLECT_PRIMARY;	/* :( better luck next time */
	/* type 3, it belongs to a chain, but it isn't the end of the chain */
	clt->mode = COLLECT_PRIMARY;
}

static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
@@ -369,10 +384,8 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
	/* used to check tail merging loop due to corrupted images */
	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
		clt->tailpcl = pcl;
	clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
	/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
		clt->tailpcl = NULL;

	z_erofs_try_to_claim_pcluster(clt);
	clt->cl = cl;
	return 0;
}
@@ -562,7 +575,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
}

static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
				struct page *page)
				struct page *page, struct list_head *pagepool)
{
	struct inode *const inode = fe->inode;
	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -615,11 +628,12 @@ restart_now:

	/* preload all compressed pages (maybe downgrade role if necessary) */
	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
		cache_strategy = DELAYEDALLOC;
		cache_strategy = TRYALLOC;
	else
		cache_strategy = DONTALLOC;

	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
				 cache_strategy, pagepool);

hitted:
	/*
@@ -648,12 +662,12 @@ hitted:

retry:
	err = z_erofs_attach_page(clt, page, page_type);
	/* should allocate an additional staging page for pagevec */
	/* should allocate an additional short-lived page for pagevec */
	if (err == -EAGAIN) {
		struct page *const newpage =
				alloc_page(GFP_NOFS | __GFP_NOFAIL);

		newpage->mapping = Z_EROFS_MAPPING_STAGING;
		set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
		err = z_erofs_attach_page(clt, newpage,
					  Z_EROFS_PAGE_TYPE_EXCLUSIVE);
		if (!err)
@@ -710,6 +724,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
		queue_work(z_erofs_workqueue, &io->u.work);
}

static bool z_erofs_page_is_invalidated(struct page *page)
{
	return !page->mapping && !z_erofs_is_shortlived_page(page);
}

static void z_erofs_decompressqueue_endio(struct bio *bio)
{
	tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
@@ -722,7 +741,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
		struct page *page = bvec->bv_page;

		DBG_BUGON(PageUptodate(page));
		DBG_BUGON(!page->mapping);
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		if (err)
			SetPageError(page);
@@ -795,9 +814,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

		/* all pages in pagevec ought to be valid */
		DBG_BUGON(!page);
		DBG_BUGON(!page->mapping);
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		if (z_erofs_put_stagingpage(pagepool, page))
		if (z_erofs_put_shortlivedpage(pagepool, page))
			continue;

		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
@@ -831,9 +850,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

		/* all compressed pages ought to be valid */
		DBG_BUGON(!page);
		DBG_BUGON(!page->mapping);
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		if (!z_erofs_page_is_staging(page)) {
		if (!z_erofs_is_shortlived_page(page)) {
			if (erofs_page_is_managed(sbi, page)) {
				if (!PageUptodate(page))
					err = -EIO;
@@ -858,7 +877,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
			overlapped = true;
		}

		/* PG_error needs checking for inplaced and staging pages */
		/* PG_error needs checking for all non-managed pages */
		if (PageError(page)) {
			DBG_BUGON(PageUptodate(page));
			err = -EIO;
@@ -897,8 +916,8 @@ out:
		if (erofs_page_is_managed(sbi, page))
			continue;

		/* recycle all individual staging pages */
		(void)z_erofs_put_stagingpage(pagepool, page);
		/* recycle all individual short-lived pages */
		(void)z_erofs_put_shortlivedpage(pagepool, page);

		WRITE_ONCE(compressed_pages[i], NULL);
	}
@@ -908,10 +927,10 @@ out:
		if (!page)
			continue;

		DBG_BUGON(!page->mapping);
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		/* recycle all individual staging pages */
		if (z_erofs_put_stagingpage(pagepool, page))
		/* recycle all individual short-lived pages */
		if (z_erofs_put_shortlivedpage(pagepool, page))
			continue;

		if (err < 0)
@@ -1008,16 +1027,30 @@ repeat:
	justfound = tagptr_unfold_tags(t);
	page = tagptr_unfold_ptr(t);

	/*
	 * preallocated cached pages, which is used to avoid direct reclaim
	 * otherwise, it will go inplace I/O path instead.
	 */
	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
		WRITE_ONCE(pcl->compressed_pages[nr], page);
		set_page_private(page, 0);
		tocache = true;
		goto out_tocache;
	}
	mapping = READ_ONCE(page->mapping);

	/*
	 * unmanaged (file) pages are all locked solidly,
	 * file-backed online pages in plcuster are all locked steady,
	 * therefore it is impossible for `mapping' to be NULL.
	 */
	if (mapping && mapping != mc)
		/* ought to be unmanaged pages */
		goto out;

	/* directly return for shortlived page as well */
	if (z_erofs_is_shortlived_page(page))
		goto out;

	lock_page(page);

	/* only true if page reclaim goes wrong, should never happen */
@@ -1061,28 +1094,21 @@ repeat:
	put_page(page);
out_allocpage:
	page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
		/* non-LRU / non-movable temporary page is needed */
		page->mapping = Z_EROFS_MAPPING_STAGING;
		tocache = false;
	}

	if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
		if (tocache) {
			/* since it added to managed cache successfully */
			unlock_page(page);
			put_page(page);
		} else {
		list_add(&page->lru, pagepool);
		}
		cond_resched();
		goto repeat;
	}

	if (tocache) {
		set_page_private(page, (unsigned long)pcl);
		SetPagePrivate(page);
out_tocache:
	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
		/* turn into temporary page if fails (1 ref) */
		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
		goto out;
	}
	attach_page_private(page, pcl);
	/* drop a refcount added by allocpage (then we have 2 refs here) */
	put_page(page);

out:	/* the only exit (for tracing and debugging) */
	return page;
}
@@ -1284,7 +1310,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)

	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;

	err = z_erofs_do_read_page(&f, page);
	err = z_erofs_do_read_page(&f, page, &pagepool);
	(void)z_erofs_collector_end(&f.clt);

	/* if some compressed cluster ready, need submit them anyway */
@@ -1338,7 +1364,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
		/* traversal in reverse order */
		head = (void *)page_private(page);

		err = z_erofs_do_read_page(&f, page);
		err = z_erofs_do_read_page(&f, page, &pagepool);
		if (err)
			erofs_err(inode->i_sb,
				  "readahead error at page %lu @ nid %llu",
Loading