Commit 7a3daded authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've added new features such as zone capacity for ZNS
  and a new GC policy, ATGC, along with in-memory segment management. In
  addition, we could improve the decompression speed significantly by
  changing virtual mapping method. Even though we've fixed lots of small
  bugs in compression support, I feel that it becomes more stable so
  that I could give it a try in production.

  Enhancements:
   - suport zone capacity in NVMe Zoned Namespace devices
   - introduce in-memory current segment management
   - add standart casefolding support
   - support age threshold based garbage collection
   - improve decompression speed by changing virtual mapping method

  Bug fixes:
   - fix condition checks in some ioctl() such as compression, move_range, etc
   - fix 32/64bits support in data structures
   - fix memory allocation in zstd decompress
   - add some boundary checks to avoid kernel panic on corrupted image
   - fix disallowing compression for non-empty file
   - fix slab leakage of compressed block writes

  In addition, it includes code refactoring for better readability and
  minor bug fixes for compression and zoned device support"

* tag 'f2fs-for-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (51 commits)
  f2fs: code cleanup by removing unnecessary check
  f2fs: wait for sysfs kobject removal before freeing f2fs_sb_info
  f2fs: fix writecount false positive in releasing compress blocks
  f2fs: introduce check_swap_activate_fast()
  f2fs: don't issue flush in f2fs_flush_device_cache() for nobarrier case
  f2fs: handle errors of f2fs_get_meta_page_nofail
  f2fs: fix to set SBI_NEED_FSCK flag for inconsistent inode
  f2fs: reject CASEFOLD inode flag without casefold feature
  f2fs: fix memory alignment to support 32bit
  f2fs: fix slab leak of rpages pointer
  f2fs: compress: fix to disallow enabling compress on non-empty file
  f2fs: compress: introduce cic/dic slab cache
  f2fs: compress: introduce page array slab cache
  f2fs: fix to do sanity check on segment/section count
  f2fs: fix to check segment boundary during SIT page readahead
  f2fs: fix uninit-value in f2fs_lookup
  f2fs: remove unneeded parameter in find_in_block()
  f2fs: fix wrong total_sections check and fsmeta check
  f2fs: remove duplicated code in sanity_check_area_boundary
  f2fs: remove unused check on version_bitmap
  ...
parents 54a4c789 788e96d1
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -22,7 +22,8 @@ Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
Description:	Controls the victim selection policy for garbage collection.
		Setting gc_idle = 0(default) will disable this option. Setting
		gc_idle = 1 will select the Cost Benefit approach & setting
		gc_idle = 2 will select the greedy approach.
		gc_idle = 2 will select the greedy approach & setting
		gc_idle = 3 will select the age-threshold based approach.

What:		/sys/fs/f2fs/<disk>/reclaim_segments
Date:		October 2013
+67 −15
Original line number Diff line number Diff line
@@ -127,14 +127,14 @@ active_logs=%u Support configuring the number of active logs. In the
			 current design, f2fs supports only 2, 4, and 6 logs.
			 Default number is 6.
disable_ext_identify	 Disable the extension list configured by mkfs, so f2fs
			 does not aware of cold files such as media files.
			 is not aware of cold files such as media files.
inline_xattr		 Enable the inline xattrs feature.
noinline_xattr		 Disable the inline xattrs feature.
inline_xattr_size=%u	 Support configuring inline xattr size, it depends on
			 flexible inline xattr feature.
inline_data		 Enable the inline data feature: New created small(<~3.4k)
inline_data		 Enable the inline data feature: Newly created small (<~3.4k)
			 files can be written into inode block.
inline_dentry		 Enable the inline dir feature: data in new created
inline_dentry		 Enable the inline dir feature: data in newly created
			 directory entries can be written into inode block. The
			 space of inode block which is used to store inline
			 dentries is limited to ~3.4k.
@@ -203,9 +203,9 @@ usrjquota=<file> Appoint specified file and type during mount, so that quota
grpjquota=<file>	 information can be properly updated during recovery flow,
prjjquota=<file>	 <quota file>: must be in root directory;
jqfmt=<quota type>	 <quota type>: [vfsold,vfsv0,vfsv1].
offusrjquota		 Turn off user journelled quota.
offgrpjquota		 Turn off group journelled quota.
offprjjquota		 Turn off project journelled quota.
offusrjquota		 Turn off user journalled quota.
offgrpjquota		 Turn off group journalled quota.
offprjjquota		 Turn off project journalled quota.
quota			 Enable plain user disk quota accounting.
noquota			 Disable all plain disk quota option.
whint_mode=%s		 Control which write hints are passed down to block
@@ -266,6 +266,8 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted
			 inline encryption hardware. The on-disk format is
			 unaffected. For more details, see
			 Documentation/block/inline-encryption.rst.
atgc			 Enable age-threshold garbage collection, it provides high
			 effectiveness and efficiency on background GC.
======================== ============================================================

Debugfs Entries
@@ -301,7 +303,7 @@ Usage

	# insmod f2fs.ko

3. Create a directory trying to mount::
3. Create a directory to use when mounting::

	# mkdir /mnt/f2fs

@@ -315,7 +317,7 @@ mkfs.f2fs
The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
which builds a basic on-disk layout.

The options consist of:
The quick options consist of:

===============    ===========================================================
``-l [label]``     Give a volume label, up to 512 unicode name.
@@ -337,6 +339,8 @@ The options consist of:
                   1 is set by default, which conducts discard.
===============    ===========================================================

Note: please refer to the manpage of mkfs.f2fs(8) to get full option list.

fsck.f2fs
---------
The fsck.f2fs is a tool to check the consistency of an f2fs-formatted
@@ -344,10 +348,12 @@ partition, which examines whether the filesystem metadata and user-made data
are cross-referenced correctly or not.
Note that, initial version of the tool does not fix any inconsistency.

The options consist of::
The quick options consist of::

  -d debug level [default:0]

Note: please refer to the manpage of fsck.f2fs(8) to get full option list.

dump.f2fs
---------
The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
@@ -371,6 +377,37 @@ Examples::
    # dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
    # dump.f2fs -a 0~-1 /dev/sdx (SSA dump)

Note: please refer to the manpage of dump.f2fs(8) to get full option list.

sload.f2fs
----------
The sload.f2fs gives a way to insert files and directories in the exisiting disk
image. This tool is useful when building f2fs images given compiled files.

Note: please refer to the manpage of sload.f2fs(8) to get full option list.

resize.f2fs
-----------
The resize.f2fs lets a user resize the f2fs-formatted disk image, while preserving
all the files and directories stored in the image.

Note: please refer to the manpage of resize.f2fs(8) to get full option list.

defrag.f2fs
-----------
The defrag.f2fs can be used to defragment scattered written data as well as
filesystem metadata across the disk. This can improve the write speed by giving
more free consecutive space.

Note: please refer to the manpage of defrag.f2fs(8) to get full option list.

f2fs_io
-------
The f2fs_io is a simple tool to issue various filesystem APIs as well as
f2fs-specific ones, which is very useful for QA tests.

Note: please refer to the manpage of f2fs_io(8) to get full option list.

Design
======

@@ -383,7 +420,7 @@ consists of a set of sections. By default, section and zone sizes are set to one
segment size identically, but users can easily modify the sizes by mkfs.

F2FS splits the entire volume into six areas, and all the areas except superblock
consists of multiple segments as described below::
consist of multiple segments as described below::

                                            align with the zone size <-|
                 |-> align with the segment size
@@ -486,7 +523,7 @@ one inode block (i.e., a file) covers::
			              `- direct node (1018)
	                                         `- data (1018)

Note that, all the node blocks are mapped by NAT which means the location of
Note that all the node blocks are mapped by NAT which means the location of
each node is translated by the NAT table. In the consideration of the wandering
tree problem, F2FS is able to cut off the propagation of node updates caused by
leaf data writes.
@@ -566,7 +603,7 @@ When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
dentry consisting of the file name and its inode number. If not found, F2FS
scans the next hash table in level #1. In this way, F2FS scans hash tables in
each levels incrementally from 1 to N. In each levels F2FS needs to scan only
each levels incrementally from 1 to N. In each level F2FS needs to scan only
one bucket determined by the following equation, which shows O(log(# of files))
complexity::

@@ -707,7 +744,7 @@ WRITE_LIFE_LONG " WRITE_LIFE_LONG
Fallocate(2) Policy
-------------------

The default policy follows the below posix rule.
The default policy follows the below POSIX rule.

Allocating disk space
    The default operation (i.e., mode is zero) of fallocate() allocates
@@ -720,7 +757,7 @@ Allocating disk space
    as a method of optimally implementing that function.

However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
fallocate(fd, DEFAULT_MODE), it allocates on-disk block addressess having
zero or random data, which is useful to the below scenario where:

 1. create(fd)
@@ -739,7 +776,7 @@ Compression implementation
  cluster can be compressed or not.

- In cluster metadata layout, one special block address is used to indicate
  cluster is compressed one or normal one, for compressed cluster, following
  a cluster is a compressed one or normal one; for compressed cluster, following
  metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs
  stores data including compress header and compressed data.

@@ -772,3 +809,18 @@ Compress metadata layout::
	+-------------+-------------+----------+----------------------------+
	| data length | data chksum | reserved |      compressed data       |
	+-------------+-------------+----------+----------------------------+

NVMe Zoned Namespace devices
----------------------------

- ZNS defines a per-zone capacity which can be equal or less than the
  zone-size. Zone-capacity is the number of usable blocks in the zone.
  F2FS checks if zone-capacity is less than zone-size, if it is, then any
  segment which starts after the zone-capacity is marked as not-free in
  the free segment bitmap at initial mount time. These segments are marked
  as permanently used so they are not allocated for writes and
  consequently are not needed to be garbage collected. In case the
  zone-capacity is not aligned to default segment size(2MB), then a segment
  can start before the zone-capacity and span across zone-capacity boundary.
  Such spanning segments are also considered as usable segments. All blocks
  past the zone-capacity are considered unusable in these segments.
+3 −3
Original line number Diff line number Diff line
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
	return (void *)f2fs_acl;

fail:
	kvfree(f2fs_acl);
	kfree(f2fs_acl);
	return ERR_PTR(-EINVAL);
}

@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
		acl = NULL;
	else
		acl = ERR_PTR(retval);
	kvfree(value);
	kfree(value);

	return acl;
}
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,

	error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);

	kvfree(value);
	kfree(value);
	if (!error)
		set_cached_acl(inode, type, acl);

+14 −3
Original line number Diff line number Diff line
@@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
	return __get_meta_page(sbi, index, true);
}

struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	int count = 0;
@@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			if (unlikely(blkno >= TOTAL_SEGS(sbi)))
				goto out;
			/* get sit block addr */
			fio.new_blkaddr = current_sit_addr(sbi,
					blkno * SIT_ENTRY_PER_BLOCK);
@@ -1047,8 +1049,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
	if (unlikely(f2fs_cp_error(sbi)))
	if (unlikely(f2fs_cp_error(sbi))) {
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
		return -EIO;
	}

	spin_lock(&sbi->inode_lock[type]);

@@ -1619,11 +1625,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

	f2fs_flush_sit_entries(sbi, cpc);

	/* save inmem log status */
	f2fs_save_inmem_curseg(sbi);

	err = do_checkpoint(sbi, cpc);
	if (err)
		f2fs_release_discard_addrs(sbi);
	else
		f2fs_clear_prefree_segments(sbi, cpc);

	f2fs_restore_inmem_curseg(sbi);
stop:
	unblock_operations(sbi);
	stat_inc_cp_count(sbi->stat_info);
@@ -1654,7 +1665,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
	}

	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
			NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
}

+182 −60
Original line number Diff line number Diff line
@@ -17,6 +17,33 @@
#include "node.h"
#include <trace/events/f2fs.h>

static struct kmem_cache *cic_entry_slab;
static struct kmem_cache *dic_entry_slab;

static void *page_array_alloc(struct inode *inode, int nr)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int size = sizeof(struct page *) * nr;

	if (likely(size <= sbi->page_array_slab_size))
		return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS);
	return f2fs_kzalloc(sbi, size, GFP_NOFS);
}

static void page_array_free(struct inode *inode, void *pages, int nr)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int size = sizeof(struct page *) * nr;

	if (!pages)
		return;

	if (likely(size <= sbi->page_array_slab_size))
		kmem_cache_free(sbi->page_array_slab, pages);
	else
		kfree(pages);
}

struct f2fs_compress_ops {
	int (*init_compress_ctx)(struct compress_ctx *cc);
	void (*destroy_compress_ctx)(struct compress_ctx *cc);
@@ -130,19 +157,16 @@ struct page *f2fs_compress_control_page(struct page *page)

int f2fs_init_compress_ctx(struct compress_ctx *cc)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);

	if (cc->nr_rpages)
	if (cc->rpages)
		return 0;

	cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
					cc->log_cluster_size, GFP_NOFS);
	cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
	return cc->rpages ? 0 : -ENOMEM;
}

void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
{
	kfree(cc->rpages);
	page_array_free(cc->inode, cc->rpages, cc->cluster_size);
	cc->rpages = NULL;
	cc->nr_rpages = 0;
	cc->nr_cpages = 0;
@@ -382,16 +406,17 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
	ZSTD_DStream *stream;
	void *workspace;
	unsigned int workspace_size;
	unsigned int max_window_size =
			MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);

	workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE);
	workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);

	workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
					workspace_size, GFP_NOFS);
	if (!workspace)
		return -ENOMEM;

	stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE,
					workspace, workspace_size);
	stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
	if (!stream) {
		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
				KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
@@ -554,13 +579,29 @@ static void f2fs_compress_free_page(struct page *page)
	mempool_free(page, compress_page_pool);
}

#define MAX_VMAP_RETRIES	3

static void *f2fs_vmap(struct page **pages, unsigned int count)
{
	int i;
	void *buf = NULL;

	for (i = 0; i < MAX_VMAP_RETRIES; i++) {
		buf = vm_map_ram(pages, count, -1);
		if (buf)
			break;
		vm_unmap_aliases();
	}
	return buf;
}

static int f2fs_compress_pages(struct compress_ctx *cc)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
	struct f2fs_inode_info *fi = F2FS_I(cc->inode);
	const struct f2fs_compress_ops *cops =
				f2fs_cops[fi->i_compress_algorithm];
	unsigned int max_len, nr_cpages;
	unsigned int max_len, new_nr_cpages;
	struct page **new_cpages;
	int i, ret;

	trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx,
@@ -575,8 +616,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
	max_len = COMPRESS_HEADER_SIZE + cc->clen;
	cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);

	cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
					cc->nr_cpages, GFP_NOFS);
	cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
	if (!cc->cpages) {
		ret = -ENOMEM;
		goto destroy_compress_ctx;
@@ -590,13 +630,13 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
		}
	}

	cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
	cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size);
	if (!cc->rbuf) {
		ret = -ENOMEM;
		goto out_free_cpages;
	}

	cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
	cc->cbuf = f2fs_vmap(cc->cpages, cc->nr_cpages);
	if (!cc->cbuf) {
		ret = -ENOMEM;
		goto out_vunmap_rbuf;
@@ -618,16 +658,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
	for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
		cc->cbuf->reserved[i] = cpu_to_le32(0);

	nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
	new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);

	/* Now we're going to cut unnecessary tail pages */
	new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
	if (!new_cpages) {
		ret = -ENOMEM;
		goto out_vunmap_cbuf;
	}

	/* zero out any unused part of the last page */
	memset(&cc->cbuf->cdata[cc->clen], 0,
	       (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
			(new_nr_cpages * PAGE_SIZE) -
			(cc->clen + COMPRESS_HEADER_SIZE));

	vunmap(cc->cbuf);
	vunmap(cc->rbuf);
	vm_unmap_ram(cc->cbuf, cc->nr_cpages);
	vm_unmap_ram(cc->rbuf, cc->cluster_size);

	for (i = nr_cpages; i < cc->nr_cpages; i++) {
	for (i = 0; i < cc->nr_cpages; i++) {
		if (i < new_nr_cpages) {
			new_cpages[i] = cc->cpages[i];
			continue;
		}
		f2fs_compress_free_page(cc->cpages[i]);
		cc->cpages[i] = NULL;
	}
@@ -635,22 +687,24 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
	if (cops->destroy_compress_ctx)
		cops->destroy_compress_ctx(cc);

	cc->nr_cpages = nr_cpages;
	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
	cc->cpages = new_cpages;
	cc->nr_cpages = new_nr_cpages;

	trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
							cc->clen, ret);
	return 0;

out_vunmap_cbuf:
	vunmap(cc->cbuf);
	vm_unmap_ram(cc->cbuf, cc->nr_cpages);
out_vunmap_rbuf:
	vunmap(cc->rbuf);
	vm_unmap_ram(cc->rbuf, cc->cluster_size);
out_free_cpages:
	for (i = 0; i < cc->nr_cpages; i++) {
		if (cc->cpages[i])
			f2fs_compress_free_page(cc->cpages[i]);
	}
	kfree(cc->cpages);
	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
	cc->cpages = NULL;
destroy_compress_ctx:
	if (cops->destroy_compress_ctx)
@@ -677,7 +731,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
	if (bio->bi_status || PageError(page))
		dic->failed = true;

	if (refcount_dec_not_one(&dic->ref))
	if (atomic_dec_return(&dic->pending_pages))
		return;

	trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
@@ -689,8 +743,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
		goto out_free_dic;
	}

	dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
					dic->cluster_size, GFP_NOFS);
	dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
	if (!dic->tpages) {
		ret = -ENOMEM;
		goto out_free_dic;
@@ -715,13 +768,13 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
			goto out_free_dic;
	}

	dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
	dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
	if (!dic->rbuf) {
		ret = -ENOMEM;
		goto destroy_decompress_ctx;
	}

	dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
	dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
	if (!dic->cbuf) {
		ret = -ENOMEM;
		goto out_vunmap_rbuf;
@@ -738,15 +791,15 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
	ret = cops->decompress_pages(dic);

out_vunmap_cbuf:
	vunmap(dic->cbuf);
	vm_unmap_ram(dic->cbuf, dic->nr_cpages);
out_vunmap_rbuf:
	vunmap(dic->rbuf);
	vm_unmap_ram(dic->rbuf, dic->cluster_size);
destroy_decompress_ctx:
	if (cops->destroy_decompress_ctx)
		cops->destroy_decompress_ctx(dic);
out_free_dic:
	if (verity)
		refcount_set(&dic->ref, dic->nr_cpages);
		atomic_set(&dic->pending_pages, dic->nr_cpages);
	if (!verity)
		f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
								ret, false);
@@ -1029,6 +1082,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,

{
	struct compress_ctx cc = {
		.inode = inode,
		.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
		.cluster_size = F2FS_I(inode)->i_cluster_size,
		.rpages = fsdata,
@@ -1132,7 +1186,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		 */
		down_read(&sbi->node_write);
	} else if (!f2fs_trylock_op(sbi)) {
		return -EAGAIN;
		goto out_free;
	}

	set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
@@ -1155,15 +1209,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,

	fio.version = ni.version;

	cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS);
	cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS);
	if (!cic)
		goto out_put_dnode;

	cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
	cic->inode = inode;
	refcount_set(&cic->ref, cc->nr_cpages);
	cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
			cc->log_cluster_size, GFP_NOFS);
	atomic_set(&cic->pending_pages, cc->nr_cpages);
	cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
	if (!cic->rpages)
		goto out_put_cic;

@@ -1257,11 +1310,13 @@ unlock_continue:
	spin_unlock(&fi->i_size_lock);

	f2fs_put_rpages(cc);
	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
	cc->cpages = NULL;
	f2fs_destroy_compress_ctx(cc);
	return 0;

out_destroy_crypt:
	kfree(cic->rpages);
	page_array_free(cc->inode, cic->rpages, cc->cluster_size);

	for (--i; i >= 0; i--)
		fscrypt_finalize_bounce_page(&cc->cpages[i]);
@@ -1271,7 +1326,7 @@ out_destroy_crypt:
		f2fs_put_page(cc->cpages[i], 1);
	}
out_put_cic:
	kfree(cic);
	kmem_cache_free(cic_entry_slab, cic);
out_put_dnode:
	f2fs_put_dnode(&dn);
out_unlock_op:
@@ -1279,6 +1334,9 @@ out_unlock_op:
		up_read(&sbi->node_write);
	else
		f2fs_unlock_op(sbi);
out_free:
	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
	cc->cpages = NULL;
	return -EAGAIN;
}

@@ -1296,7 +1354,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)

	dec_page_count(sbi, F2FS_WB_DATA);

	if (refcount_dec_not_one(&cic->ref))
	if (atomic_dec_return(&cic->pending_pages))
		return;

	for (i = 0; i < cic->nr_rpages; i++) {
@@ -1305,8 +1363,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
		end_page_writeback(cic->rpages[i]);
	}

	kfree(cic->rpages);
	kfree(cic);
	page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
	kmem_cache_free(cic_entry_slab, cic);
}

static int f2fs_write_raw_pages(struct compress_ctx *cc,
@@ -1388,9 +1446,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
					struct writeback_control *wbc,
					enum iostat_type io_type)
{
	struct f2fs_inode_info *fi = F2FS_I(cc->inode);
	const struct f2fs_compress_ops *cops =
			f2fs_cops[fi->i_compress_algorithm];
	int err;

	*submitted = 0;
@@ -1405,9 +1460,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,

		err = f2fs_write_compressed_pages(cc, submitted,
							wbc, io_type);
		cops->destroy_compress_ctx(cc);
		kfree(cc->cpages);
		cc->cpages = NULL;
		if (!err)
			return 0;
		f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN);
@@ -1424,25 +1476,23 @@ destroy_out:

struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
	struct decompress_io_ctx *dic;
	pgoff_t start_idx = start_idx_of_cluster(cc);
	int i;

	dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS);
	dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS);
	if (!dic)
		return ERR_PTR(-ENOMEM);

	dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
			cc->log_cluster_size, GFP_NOFS);
	dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
	if (!dic->rpages) {
		kfree(dic);
		kmem_cache_free(dic_entry_slab, dic);
		return ERR_PTR(-ENOMEM);
	}

	dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
	dic->inode = cc->inode;
	refcount_set(&dic->ref, cc->nr_cpages);
	atomic_set(&dic->pending_pages, cc->nr_cpages);
	dic->cluster_idx = cc->cluster_idx;
	dic->cluster_size = cc->cluster_size;
	dic->log_cluster_size = cc->log_cluster_size;
@@ -1453,8 +1503,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
		dic->rpages[i] = cc->rpages[i];
	dic->nr_rpages = cc->cluster_size;

	dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
					dic->nr_cpages, GFP_NOFS);
	dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
	if (!dic->cpages)
		goto out_free;

@@ -1489,7 +1538,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
				continue;
			f2fs_compress_free_page(dic->tpages[i]);
		}
		kfree(dic->tpages);
		page_array_free(dic->inode, dic->tpages, dic->cluster_size);
	}

	if (dic->cpages) {
@@ -1498,11 +1547,11 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
				continue;
			f2fs_compress_free_page(dic->cpages[i]);
		}
		kfree(dic->cpages);
		page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
	}

	kfree(dic->rpages);
	kfree(dic);
	page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
	kmem_cache_free(dic_entry_slab, dic);
}

void f2fs_decompress_end_io(struct page **rpages,
@@ -1530,3 +1579,76 @@ unlock:
		unlock_page(rpage);
	}
}

int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
{
	dev_t dev = sbi->sb->s_bdev->bd_dev;
	char slab_name[32];

	sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));

	sbi->page_array_slab_size = sizeof(struct page *) <<
					F2FS_OPTION(sbi).compress_log_size;

	sbi->page_array_slab = f2fs_kmem_cache_create(slab_name,
					sbi->page_array_slab_size);
	if (!sbi->page_array_slab)
		return -ENOMEM;
	return 0;
}

void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
{
	kmem_cache_destroy(sbi->page_array_slab);
}

static int __init f2fs_init_cic_cache(void)
{
	cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry",
					sizeof(struct compress_io_ctx));
	if (!cic_entry_slab)
		return -ENOMEM;
	return 0;
}

static void f2fs_destroy_cic_cache(void)
{
	kmem_cache_destroy(cic_entry_slab);
}

static int __init f2fs_init_dic_cache(void)
{
	dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry",
					sizeof(struct decompress_io_ctx));
	if (!dic_entry_slab)
		return -ENOMEM;
	return 0;
}

static void f2fs_destroy_dic_cache(void)
{
	kmem_cache_destroy(dic_entry_slab);
}

int __init f2fs_init_compress_cache(void)
{
	int err;

	err = f2fs_init_cic_cache();
	if (err)
		goto out;
	err = f2fs_init_dic_cache();
	if (err)
		goto free_cic;
	return 0;
free_cic:
	f2fs_destroy_cic_cache();
out:
	return -ENOMEM;
}

void f2fs_destroy_compress_cache(void)
{
	f2fs_destroy_dic_cache();
	f2fs_destroy_cic_cache();
}
Loading