Commit b1e24395 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "This contains usual mix of new features, core changes and fixes; full
  list below. I'm planning second pull request, with a few more fixes
  that arrived recently but too close to merge window, will send it next
  week.

  New features:

   - support zstd compression levels

   - new ioctl to unregister a device from the module (ie. reverse of
     device scan)

   - scrub prints a message to log when it's about to start or finish

  Core changes:

   - qgroups can now skip part of a tree that does not get updated
     during relocation, because this does not affect the quota
     accounting, estimated speedup in run time is about 20%

   - the compression workspace management had to be enhanced due to zstd
     requirements

   - various enospc fixes, when there's high fragmentation the
     over-reservation can cause ENOSPC that might not happen after a
     flush, in such cases try to wait if the situation improves

  Fixes:

   - various ioctls could overwrite previous return value if
     copy_to_user fails, fix this so the original error is reported

   - more reclaim vs GFP_KERNEL fixes

   - other cleanups and refactoring

   - fix a (valid) lockdep warning in a test when device replace is
     destroying worker threads

   - make qgroup async transaction commit more aggressive, this avoids
     some 'quota limit reached' errors if there are not enough data to
     trigger transaction in order to flush

   - fix deadlock between snapshot deletion and quotas when backref
     walking is called from context that already holds the same locks

   - fsync fixes:
      - fix fsync after succession of renames of different files
      - fix fsync after succession of renames and unlink/rmdir"

* tag 'for-5.1-part1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (92 commits)
  btrfs: Remove unnecessary casts in btrfs_read_root_item
  Btrfs: remove assertion when searching for a key in a node/leaf
  Btrfs: add missing error handling after doing leaf/node binary search
  btrfs: drop the lock on error in btrfs_dev_replace_cancel
  btrfs: ensure that a DUP or RAID1 block group has exactly two stripes
  btrfs: init csum_list before possible free
  Btrfs: remove no longer needed range length checks for deduplication
  Btrfs: fix fsync after succession of renames and unlink/rmdir
  Btrfs: fix fsync after succession of renames of different files
  btrfs: honor path->skip_locking in backref code
  btrfs: qgroup: Make qgroup async transaction commit more aggressive
  btrfs: qgroup: Move reserved data accounting from btrfs_delayed_ref_head to btrfs_qgroup_extent_record
  btrfs: scrub: remove unused nocow worker pointer
  btrfs: scrub: add assertions for worker pointers
  btrfs: scrub: convert scrub_workers_refcnt to refcount_t
  btrfs: scrub: add scrub_lock lockdep check in scrub_workers_get
  btrfs: scrub: fix circular locking dependency warning
  btrfs: fix comment its device list mutex not volume lock
  btrfs: extent_io: Kill the forward declaration of flush_write_bio
  btrfs: Fix grossly misleading argument names in extent io search
  ...
parents 0556161f f65e25e3
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/posix_acl_xattr.h>
#include <linux/posix_acl.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>

#include "ctree.h"
@@ -72,8 +73,16 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
	}

	if (acl) {
		unsigned int nofs_flag;

		size = posix_acl_xattr_size(acl->a_count);
		/*
		 * We're holding a transaction handle, so use a NOFS memory
		 * allocation context to avoid deadlock if reclaim happens.
		 */
		nofs_flag = memalloc_nofs_save();
		value = kmalloc(size, GFP_KERNEL);
		memalloc_nofs_restore(nofs_flag);
		if (!value) {
			ret = -ENOMEM;
			goto out;
+4 −6
Original line number Diff line number Diff line
@@ -139,13 +139,11 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
	}

	if (flags & WQ_HIGHPRI)
		ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
						 ret->current_active, "btrfs",
						 name);
		ret->normal_wq = alloc_workqueue("btrfs-%s-high", flags,
						 ret->current_active, name);
	else
		ret->normal_wq = alloc_workqueue("%s-%s", flags,
						 ret->current_active, "btrfs",
						 name);
		ret->normal_wq = alloc_workqueue("btrfs-%s", flags,
						 ret->current_active, name);
	if (!ret->normal_wq) {
		kfree(ret);
		return NULL;
+14 −8
Original line number Diff line number Diff line
@@ -712,7 +712,7 @@ out:
 * read tree blocks and add keys where required.
 */
static int add_missing_keys(struct btrfs_fs_info *fs_info,
			    struct preftrees *preftrees)
			    struct preftrees *preftrees, bool lock)
{
	struct prelim_ref *ref;
	struct extent_buffer *eb;
@@ -737,11 +737,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
			free_extent_buffer(eb);
			return -EIO;
		}
		if (lock)
			btrfs_tree_read_lock(eb);
		if (btrfs_header_level(eb) == 0)
			btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
		else
			btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
		if (lock)
			btrfs_tree_read_unlock(eb);
		free_extent_buffer(eb);
		prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
@@ -1227,7 +1229,7 @@ again:

	btrfs_release_path(path);

	ret = add_missing_keys(fs_info, &preftrees);
	ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
	if (ret)
		goto out;

@@ -1288,10 +1290,14 @@ again:
					ret = -EIO;
					goto out;
				}

				if (!path->skip_locking) {
					btrfs_tree_read_lock(eb);
				btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
					btrfs_set_lock_blocking_read(eb);
				}
				ret = find_extent_in_eb(eb, bytenr,
							*extent_item_pos, &eie, ignore_offset);
				if (!path->skip_locking)
					btrfs_tree_read_unlock_blocking(eb);
				free_extent_buffer(eb);
				if (ret < 0)
@@ -1650,7 +1656,7 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
		/* make sure we can use eb after releasing the path */
		if (eb != eb_in) {
			if (!path->skip_locking)
				btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
				btrfs_set_lock_blocking_read(eb);
			path->nodes[0] = NULL;
			path->locks[0] = 0;
		}
+120 −133
Original line number Diff line number Diff line
@@ -730,6 +730,28 @@ struct heuristic_ws {
	struct list_head list;
};

static struct workspace_manager heuristic_wsm;

static void heuristic_init_workspace_manager(void)
{
	btrfs_init_workspace_manager(&heuristic_wsm, &btrfs_heuristic_compress);
}

static void heuristic_cleanup_workspace_manager(void)
{
	btrfs_cleanup_workspace_manager(&heuristic_wsm);
}

static struct list_head *heuristic_get_workspace(unsigned int level)
{
	return btrfs_get_workspace(&heuristic_wsm, level);
}

static void heuristic_put_workspace(struct list_head *ws)
{
	btrfs_put_workspace(&heuristic_wsm, ws);
}

static void free_heuristic_ws(struct list_head *ws)
{
	struct heuristic_ws *workspace;
@@ -742,7 +764,7 @@ static void free_heuristic_ws(struct list_head *ws)
	kfree(workspace);
}

static struct list_head *alloc_heuristic_ws(void)
static struct list_head *alloc_heuristic_ws(unsigned int level)
{
	struct heuristic_ws *ws;

@@ -769,66 +791,60 @@ fail:
	return ERR_PTR(-ENOMEM);
}

struct workspaces_list {
	struct list_head idle_ws;
	spinlock_t ws_lock;
	/* Number of free workspaces */
	int free_ws;
	/* Total number of allocated workspaces */
	atomic_t total_ws;
	/* Waiters for a free workspace */
	wait_queue_head_t ws_wait;
const struct btrfs_compress_op btrfs_heuristic_compress = {
	.init_workspace_manager = heuristic_init_workspace_manager,
	.cleanup_workspace_manager = heuristic_cleanup_workspace_manager,
	.get_workspace = heuristic_get_workspace,
	.put_workspace = heuristic_put_workspace,
	.alloc_workspace = alloc_heuristic_ws,
	.free_workspace = free_heuristic_ws,
};

static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];

static struct workspaces_list btrfs_heuristic_ws;

static const struct btrfs_compress_op * const btrfs_compress_op[] = {
	/* The heuristic is represented as compression type 0 */
	&btrfs_heuristic_compress,
	&btrfs_zlib_compress,
	&btrfs_lzo_compress,
	&btrfs_zstd_compress,
};

void __init btrfs_init_compress(void)
void btrfs_init_workspace_manager(struct workspace_manager *wsm,
				  const struct btrfs_compress_op *ops)
{
	struct list_head *workspace;
	int i;

	INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
	spin_lock_init(&btrfs_heuristic_ws.ws_lock);
	atomic_set(&btrfs_heuristic_ws.total_ws, 0);
	init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);

	workspace = alloc_heuristic_ws();
	if (IS_ERR(workspace)) {
		pr_warn(
	"BTRFS: cannot preallocate heuristic workspace, will try later\n");
	} else {
		atomic_set(&btrfs_heuristic_ws.total_ws, 1);
		btrfs_heuristic_ws.free_ws = 1;
		list_add(workspace, &btrfs_heuristic_ws.idle_ws);
	}
	wsm->ops = ops;

	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
		INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
		spin_lock_init(&btrfs_comp_ws[i].ws_lock);
		atomic_set(&btrfs_comp_ws[i].total_ws, 0);
		init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
	INIT_LIST_HEAD(&wsm->idle_ws);
	spin_lock_init(&wsm->ws_lock);
	atomic_set(&wsm->total_ws, 0);
	init_waitqueue_head(&wsm->ws_wait);

	/*
		 * Preallocate one workspace for each compression type so
		 * we can guarantee forward progress in the worst case
	 * Preallocate one workspace for each compression type so we can
	 * guarantee forward progress in the worst case
	 */
		workspace = btrfs_compress_op[i]->alloc_workspace();
	workspace = wsm->ops->alloc_workspace(0);
	if (IS_ERR(workspace)) {
			pr_warn("BTRFS: cannot preallocate compression workspace, will try later\n");
		pr_warn(
	"BTRFS: cannot preallocate compression workspace, will try later\n");
	} else {
			atomic_set(&btrfs_comp_ws[i].total_ws, 1);
			btrfs_comp_ws[i].free_ws = 1;
			list_add(workspace, &btrfs_comp_ws[i].idle_ws);
		atomic_set(&wsm->total_ws, 1);
		wsm->free_ws = 1;
		list_add(workspace, &wsm->idle_ws);
	}
}

void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
{
	struct list_head *ws;

	while (!list_empty(&wsman->idle_ws)) {
		ws = wsman->idle_ws.next;
		list_del(ws);
		wsman->ops->free_workspace(ws);
		atomic_dec(&wsman->total_ws);
	}
}

/*
@@ -837,11 +853,11 @@ void __init btrfs_init_compress(void)
 * Preallocation makes a forward progress guarantees and we do not return
 * errors.
 */
static struct list_head *__find_workspace(int type, bool heuristic)
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
				      unsigned int level)
{
	struct list_head *workspace;
	int cpus = num_online_cpus();
	int idx = type - 1;
	unsigned nofs_flag;
	struct list_head *idle_ws;
	spinlock_t *ws_lock;
@@ -849,19 +865,11 @@ static struct list_head *__find_workspace(int type, bool heuristic)
	wait_queue_head_t *ws_wait;
	int *free_ws;

	if (heuristic) {
		idle_ws	 = &btrfs_heuristic_ws.idle_ws;
		ws_lock	 = &btrfs_heuristic_ws.ws_lock;
		total_ws = &btrfs_heuristic_ws.total_ws;
		ws_wait	 = &btrfs_heuristic_ws.ws_wait;
		free_ws	 = &btrfs_heuristic_ws.free_ws;
	} else {
		idle_ws	 = &btrfs_comp_ws[idx].idle_ws;
		ws_lock	 = &btrfs_comp_ws[idx].ws_lock;
		total_ws = &btrfs_comp_ws[idx].total_ws;
		ws_wait	 = &btrfs_comp_ws[idx].ws_wait;
		free_ws	 = &btrfs_comp_ws[idx].free_ws;
	}
	idle_ws	 = &wsm->idle_ws;
	ws_lock	 = &wsm->ws_lock;
	total_ws = &wsm->total_ws;
	ws_wait	 = &wsm->ws_wait;
	free_ws	 = &wsm->free_ws;

again:
	spin_lock(ws_lock);
@@ -892,10 +900,7 @@ again:
	 * context of btrfs_compress_bio/btrfs_compress_pages
	 */
	nofs_flag = memalloc_nofs_save();
	if (heuristic)
		workspace = alloc_heuristic_ws();
	else
		workspace = btrfs_compress_op[idx]->alloc_workspace();
	workspace = wsm->ops->alloc_workspace(level);
	memalloc_nofs_restore(nofs_flag);

	if (IS_ERR(workspace)) {
@@ -926,85 +931,47 @@ again:
	return workspace;
}

static struct list_head *find_workspace(int type)
static struct list_head *get_workspace(int type, int level)
{
	return __find_workspace(type, false);
	return btrfs_compress_op[type]->get_workspace(level);
}

/*
 * put a workspace struct back on the list or free it if we have enough
 * idle ones sitting around
 */
static void __free_workspace(int type, struct list_head *workspace,
			     bool heuristic)
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
{
	int idx = type - 1;
	struct list_head *idle_ws;
	spinlock_t *ws_lock;
	atomic_t *total_ws;
	wait_queue_head_t *ws_wait;
	int *free_ws;

	if (heuristic) {
		idle_ws	 = &btrfs_heuristic_ws.idle_ws;
		ws_lock	 = &btrfs_heuristic_ws.ws_lock;
		total_ws = &btrfs_heuristic_ws.total_ws;
		ws_wait	 = &btrfs_heuristic_ws.ws_wait;
		free_ws	 = &btrfs_heuristic_ws.free_ws;
	} else {
		idle_ws	 = &btrfs_comp_ws[idx].idle_ws;
		ws_lock	 = &btrfs_comp_ws[idx].ws_lock;
		total_ws = &btrfs_comp_ws[idx].total_ws;
		ws_wait	 = &btrfs_comp_ws[idx].ws_wait;
		free_ws	 = &btrfs_comp_ws[idx].free_ws;
	}
	idle_ws	 = &wsm->idle_ws;
	ws_lock	 = &wsm->ws_lock;
	total_ws = &wsm->total_ws;
	ws_wait	 = &wsm->ws_wait;
	free_ws	 = &wsm->free_ws;

	spin_lock(ws_lock);
	if (*free_ws <= num_online_cpus()) {
		list_add(workspace, idle_ws);
		list_add(ws, idle_ws);
		(*free_ws)++;
		spin_unlock(ws_lock);
		goto wake;
	}
	spin_unlock(ws_lock);

	if (heuristic)
		free_heuristic_ws(workspace);
	else
		btrfs_compress_op[idx]->free_workspace(workspace);
	wsm->ops->free_workspace(ws);
	atomic_dec(total_ws);
wake:
	cond_wake_up(ws_wait);
}

static void free_workspace(int type, struct list_head *ws)
static void put_workspace(int type, struct list_head *ws)
{
	return __free_workspace(type, ws, false);
}

/*
 * cleanup function for module exit
 */
static void free_workspaces(void)
{
	struct list_head *workspace;
	int i;

	while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
		workspace = btrfs_heuristic_ws.idle_ws.next;
		list_del(workspace);
		free_heuristic_ws(workspace);
		atomic_dec(&btrfs_heuristic_ws.total_ws);
	}

	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
		while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
			workspace = btrfs_comp_ws[i].idle_ws.next;
			list_del(workspace);
			btrfs_compress_op[i]->free_workspace(workspace);
			atomic_dec(&btrfs_comp_ws[i].total_ws);
		}
	}
	return btrfs_compress_op[type]->put_workspace(ws);
}

/*
@@ -1036,18 +1003,17 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			 unsigned long *total_in,
			 unsigned long *total_out)
{
	int type = btrfs_compress_type(type_level);
	int level = btrfs_compress_level(type_level);
	struct list_head *workspace;
	int ret;
	int type = type_level & 0xF;

	workspace = find_workspace(type);

	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
	workspace = get_workspace(type, level);
	ret = btrfs_compress_op[type]->compress_pages(workspace, mapping,
						      start, pages,
						      out_pages,
						      total_in, total_out);
	free_workspace(type, workspace);
	put_workspace(type, workspace);
	return ret;
}

@@ -1071,9 +1037,9 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
	int ret;
	int type = cb->compress_type;

	workspace = find_workspace(type);
	ret = btrfs_compress_op[type - 1]->decompress_bio(workspace, cb);
	free_workspace(type, workspace);
	workspace = get_workspace(type, 0);
	ret = btrfs_compress_op[type]->decompress_bio(workspace, cb);
	put_workspace(type, workspace);

	return ret;
}
@@ -1089,19 +1055,29 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
	struct list_head *workspace;
	int ret;

	workspace = find_workspace(type);

	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
	workspace = get_workspace(type, 0);
	ret = btrfs_compress_op[type]->decompress(workspace, data_in,
						  dest_page, start_byte,
						  srclen, destlen);
	put_workspace(type, workspace);

	free_workspace(type, workspace);
	return ret;
}

void __init btrfs_init_compress(void)
{
	int i;

	for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
		btrfs_compress_op[i]->init_workspace_manager();
}

void __cold btrfs_exit_compress(void)
{
	free_workspaces();
	int i;

	for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
		btrfs_compress_op[i]->cleanup_workspace_manager();
}

/*
@@ -1512,7 +1488,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
 */
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
{
	struct list_head *ws_list = __find_workspace(0, true);
	struct list_head *ws_list = get_workspace(0, 0);
	struct heuristic_ws *ws;
	u32 i;
	u8 byte;
@@ -1581,18 +1557,29 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
	}

out:
	__free_workspace(0, ws_list, true);
	put_workspace(0, ws_list);
	return ret;
}

unsigned int btrfs_compress_str2level(const char *str)
/*
 * Convert the compression suffix (eg. after "zlib" starting with ":") to
 * level, unrecognized string will set the default level
 */
unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
{
	if (strncmp(str, "zlib", 4) != 0)
	unsigned int level = 0;
	int ret;

	if (!type)
		return 0;

	/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
	if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
		return str[5] - '0';
	if (str[0] == ':') {
		ret = kstrtouint(str + 1, 10, &level);
		if (ret)
			level = 0;
	}

	level = btrfs_compress_op[type]->set_level(level);

	return BTRFS_ZLIB_DEFAULT_LEVEL;
	return level;
}
+49 −3
Original line number Diff line number Diff line
@@ -64,6 +64,16 @@ struct compressed_bio {
	u32 sums;
};

static inline unsigned int btrfs_compress_type(unsigned int type_level)
{
	return (type_level & 0xF);
}

static inline unsigned int btrfs_compress_level(unsigned int type_level)
{
	return ((type_level & 0xF0) >> 4);
}

void __init btrfs_init_compress(void);
void __cold btrfs_exit_compress(void);

@@ -87,7 +97,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
				 int mirror_num, unsigned long bio_flags);

unsigned btrfs_compress_str2level(const char *str);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);

enum btrfs_compression_type {
	BTRFS_COMPRESS_NONE  = 0,
@@ -97,8 +107,35 @@ enum btrfs_compression_type {
	BTRFS_COMPRESS_TYPES = 3,
};

struct workspace_manager {
	const struct btrfs_compress_op *ops;
	struct list_head idle_ws;
	spinlock_t ws_lock;
	/* Number of free workspaces */
	int free_ws;
	/* Total number of allocated workspaces */
	atomic_t total_ws;
	/* Waiters for a free workspace */
	wait_queue_head_t ws_wait;
};

void btrfs_init_workspace_manager(struct workspace_manager *wsm,
				  const struct btrfs_compress_op *ops);
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
				      unsigned int level);
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws);
void btrfs_cleanup_workspace_manager(struct workspace_manager *wsm);

struct btrfs_compress_op {
	struct list_head *(*alloc_workspace)(void);
	void (*init_workspace_manager)(void);

	void (*cleanup_workspace_manager)(void);

	struct list_head *(*get_workspace)(unsigned int level);

	void (*put_workspace)(struct list_head *ws);

	struct list_head *(*alloc_workspace)(unsigned int level);

	void (*free_workspace)(struct list_head *workspace);

@@ -119,9 +156,18 @@ struct btrfs_compress_op {
			  unsigned long start_byte,
			  size_t srclen, size_t destlen);

	void (*set_level)(struct list_head *ws, unsigned int type);
	/*
	 * This bounds the level set by the user to be within range of a
	 * particular compression type.  It returns the level that will be used
	 * if the level is out of bounds or the default if 0 is passed in.
	 */
	unsigned int (*set_level)(unsigned int level);
};

/* The heuristic workspaces are managed via the 0th workspace manager */
#define BTRFS_NR_WORKSPACE_MANAGERS	(BTRFS_COMPRESS_TYPES + 1)

extern const struct btrfs_compress_op btrfs_heuristic_compress;
extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress;
Loading