Commit 97d0bf96 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "User visible changes:
   - new block group profiles: RAID1 with 3- and 4- copies
       - RAID1 in btrfs has always 2 copies, now add support for 3 and 4
       - this is an incompat feature (named RAID1C34)
       - recommended use of RAID1C3 is replacement of RAID6 profile on
         metadata, this brings a more reliable resiliency against 2
         device loss/damage

   - support for new checksums
       - per-filesystem, set at mkfs time
       - fast hash (crc32c successor): xxhash, 64bit digest
       - strong hashes (both 256bit): sha256 (slower, FIPS), blake2b
         (faster)
       - the blake2b module goes via the crypto tree, btrfs.ko has a
         soft dependency

   - speed up lseek, don't take inode locks unnecessarily, this can
     speed up parallel SEEK_CUR/SEEK_SET/SEEK_END by 80%

   - send:
       - allow clone operations within the same file
       - limit maximum number of sent clone references to avoid slow
         backref walking

   - error message improvements: device scan prints process name and PID

  Core changes:
   - cleanups
       - remove unique workqueue helpers, used to provide a way to avoid
         deadlocks in the workqueue code, now done in a simpler way
       - remove lots of indirect function calls in compression code
       - extent IO tree code moved out of extent_io.c
       - cleanup backup superblock handling at mount time
       - transaction life cycle documentation and cleanups
       - locking code cleanups, annotations and documentation
       - add more cold, const, pure function attributes
       - removal of unused or redundant struct members or variables

   - new tree-checker sanity tests
       - try to detect missing INODE_ITEM, cross-reference checks of
         DIR_ITEM, DIR_INDEX, INODE_REF, and XATTR_* items

   - remove own bio scheduling code (used to avoid checksum submissions
     being stuck behind other IO), replaced by cgroup controller-based
     code to allow better control and avoid priority inversions in cases
     where the custom and cgroup scheduling disagreed

  Fixes:
   - avoid getting stuck during cyclic writebacks

   - fix trimming of ranges crossing block group boundaries

   - fix rename exchange on subvolumes, all involved subvolumes need to
     be recorded in the transaction"

* tag 'for-5.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (137 commits)
  btrfs: drop bdev argument from submit_extent_page
  btrfs: remove extent_map::bdev
  btrfs: drop bio_set_dev where not needed
  btrfs: get bdev directly from fs_devices in submit_extent_page
  btrfs: record all roots for rename exchange on a subvol
  Btrfs: fix block group remaining RO forever after error during device replace
  btrfs: scrub: Don't check free space before marking a block group RO
  btrfs: change btrfs_fs_devices::rotating to bool
  btrfs: change btrfs_fs_devices::seeding to bool
  btrfs: rename btrfs_block_group_cache
  btrfs: block-group: Reuse the item key from caller of read_one_block_group()
  btrfs: block-group: Refactor btrfs_read_block_groups()
  btrfs: document extent buffer locking
  btrfs: access eb::blocking_writers according to ACCESS_ONCE policies
  btrfs: set blocking_writers directly, no increment or decrement
  btrfs: merge blocking_writers branches in btrfs_tree_read_lock
  btrfs: drop incompat bit for raid1c34 after last block group is gone
  btrfs: add incompat for raid1 with 3, 4 copies
  btrfs: add support for 4-copy replication (raid1c4)
  btrfs: add support for 3-copy replication (raid1c3)
  ...
parents 1b88176b fa17ed06
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@ config BTRFS_FS
	select CRYPTO
	select CRYPTO_CRC32C
	select LIBCRC32C
	select CRYPTO_XXHASH
	select CRYPTO_SHA256
	select ZLIB_INFLATE
	select ZLIB_DEFLATE
	select LZO_COMPRESS
+53 −60
Original line number Diff line number Diff line
@@ -53,24 +53,12 @@ struct btrfs_workqueue {
	struct __btrfs_workqueue *high;
};

static void normal_work_helper(struct btrfs_work *work);

#define BTRFS_WORK_HELPER(name)					\
noinline_for_stack void btrfs_##name(struct work_struct *arg)		\
{									\
	struct btrfs_work *work = container_of(arg, struct btrfs_work,	\
					       normal_work);		\
	normal_work_helper(work);					\
}

struct btrfs_fs_info *
btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
{
	return wq->fs_info;
}

struct btrfs_fs_info *
btrfs_work_owner(const struct btrfs_work *work)
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work)
{
	return work->wq->fs_info;
}
@@ -89,29 +77,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
	return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
}

BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
BTRFS_WORK_HELPER(cache_helper);
BTRFS_WORK_HELPER(submit_helper);
BTRFS_WORK_HELPER(fixup_helper);
BTRFS_WORK_HELPER(endio_helper);
BTRFS_WORK_HELPER(endio_meta_helper);
BTRFS_WORK_HELPER(endio_meta_write_helper);
BTRFS_WORK_HELPER(endio_raid56_helper);
BTRFS_WORK_HELPER(endio_repair_helper);
BTRFS_WORK_HELPER(rmw_helper);
BTRFS_WORK_HELPER(endio_write_helper);
BTRFS_WORK_HELPER(freespace_write_helper);
BTRFS_WORK_HELPER(delayed_meta_helper);
BTRFS_WORK_HELPER(readahead_helper);
BTRFS_WORK_HELPER(qgroup_rescan_helper);
BTRFS_WORK_HELPER(extent_refs_helper);
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
BTRFS_WORK_HELPER(scrubparity_helper);

static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
			unsigned int flags, int limit_active, int thresh)
@@ -252,16 +217,16 @@ out:
	}
}

static void run_ordered_work(struct __btrfs_workqueue *wq)
static void run_ordered_work(struct __btrfs_workqueue *wq,
			     struct btrfs_work *self)
{
	struct list_head *list = &wq->ordered_list;
	struct btrfs_work *work;
	spinlock_t *lock = &wq->list_lock;
	unsigned long flags;
	bool free_self = false;

	while (1) {
		void *wtag;

		spin_lock_irqsave(lock, flags);
		if (list_empty(list))
			break;
@@ -287,22 +252,53 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
		list_del(&work->ordered_list);
		spin_unlock_irqrestore(lock, flags);

		if (work == self) {
			/*
		 * We don't want to call the ordered free functions with the
		 * lock held though. Save the work as tag for the trace event,
		 * because the callback could free the structure.
			 * This is the work item that the worker is currently
			 * executing.
			 *
			 * The kernel workqueue code guarantees non-reentrancy
			 * of work items. I.e., if a work item with the same
			 * address and work function is queued twice, the second
			 * execution is blocked until the first one finishes. A
			 * work item may be freed and recycled with the same
			 * work function; the workqueue code assumes that the
			 * original work item cannot depend on the recycled work
			 * item in that case (see find_worker_executing_work()).
			 *
			 * Note that different types of Btrfs work can depend on
			 * each other, and one type of work on one Btrfs
			 * filesystem may even depend on the same type of work
			 * on another Btrfs filesystem via, e.g., a loop device.
			 * Therefore, we must not allow the current work item to
			 * be recycled until we are really done, otherwise we
			 * break the above assumption and can deadlock.
			 */
			free_self = true;
		} else {
			/*
			 * We don't want to call the ordered free functions with
			 * the lock held.
			 */
		wtag = work;
			work->ordered_free(work);
		trace_btrfs_all_work_done(wq->fs_info, wtag);
			/* NB: work must not be dereferenced past this point. */
			trace_btrfs_all_work_done(wq->fs_info, work);
		}
	}
	spin_unlock_irqrestore(lock, flags);

	if (free_self) {
		self->ordered_free(self);
		/* NB: self must not be dereferenced past this point. */
		trace_btrfs_all_work_done(wq->fs_info, self);
	}
}

static void normal_work_helper(struct btrfs_work *work)
static void btrfs_work_helper(struct work_struct *normal_work)
{
	struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
					       normal_work);
	struct __btrfs_workqueue *wq;
	void *wtag;
	int need_order = 0;

	/*
@@ -316,29 +312,26 @@ static void normal_work_helper(struct btrfs_work *work)
	if (work->ordered_func)
		need_order = 1;
	wq = work->wq;
	/* Safe for tracepoints in case work gets freed by the callback */
	wtag = work;

	trace_btrfs_work_sched(work);
	thresh_exec_hook(wq);
	work->func(work);
	if (need_order) {
		set_bit(WORK_DONE_BIT, &work->flags);
		run_ordered_work(wq);
		run_ordered_work(wq, work);
	} else {
		/* NB: work must not be dereferenced past this point. */
		trace_btrfs_all_work_done(wq->fs_info, work);
	}
	if (!need_order)
		trace_btrfs_all_work_done(wq->fs_info, wtag);
}

void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
		     btrfs_func_t func,
		     btrfs_func_t ordered_func,
		     btrfs_func_t ordered_free)
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
		     btrfs_func_t ordered_func, btrfs_func_t ordered_free)
{
	work->func = func;
	work->ordered_func = ordered_func;
	work->ordered_free = ordered_free;
	INIT_WORK(&work->normal_work, uniq_func);
	INIT_WORK(&work->normal_work, btrfs_work_helper);
	INIT_LIST_HEAD(&work->ordered_list);
	work->flags = 0;
}
+4 −33
Original line number Diff line number Diff line
@@ -29,49 +29,20 @@ struct btrfs_work {
	unsigned long flags;
};

#define BTRFS_WORK_HELPER_PROTO(name)					\
void btrfs_##name(struct work_struct *arg)

BTRFS_WORK_HELPER_PROTO(worker_helper);
BTRFS_WORK_HELPER_PROTO(delalloc_helper);
BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
BTRFS_WORK_HELPER_PROTO(cache_helper);
BTRFS_WORK_HELPER_PROTO(submit_helper);
BTRFS_WORK_HELPER_PROTO(fixup_helper);
BTRFS_WORK_HELPER_PROTO(endio_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
BTRFS_WORK_HELPER_PROTO(rmw_helper);
BTRFS_WORK_HELPER_PROTO(endio_write_helper);
BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
BTRFS_WORK_HELPER_PROTO(readahead_helper);
BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
BTRFS_WORK_HELPER_PROTO(scrub_helper);
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
BTRFS_WORK_HELPER_PROTO(scrubparity_helper);


struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
					      const char *name,
					      unsigned int flags,
					      int limit_active,
					      int thresh);
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
		     btrfs_func_t func,
		     btrfs_func_t ordered_func,
		     btrfs_func_t ordered_free);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
		     btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
		      struct btrfs_work *work);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);

#endif
+302 −287

File changed.

Preview size limit exceeded, changes collapsed.

+26 −25
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ struct btrfs_caching_control {
	struct mutex mutex;
	wait_queue_head_t wait;
	struct btrfs_work work;
	struct btrfs_block_group_cache *block_group;
	struct btrfs_block_group *block_group;
	u64 progress;
	refcount_t count;
};
@@ -42,14 +42,15 @@ struct btrfs_caching_control {
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP SZ_2M

struct btrfs_block_group_cache {
	struct btrfs_key key;
	struct btrfs_block_group_item item;
struct btrfs_block_group {
	struct btrfs_fs_info *fs_info;
	struct inode *inode;
	spinlock_t lock;
	u64 start;
	u64 length;
	u64 pinned;
	u64 reserved;
	u64 used;
	u64 delalloc_bytes;
	u64 bytes_super;
	u64 flags;
@@ -159,7 +160,7 @@ struct btrfs_block_group_cache {

#ifdef CONFIG_BTRFS_DEBUG
static inline int btrfs_should_fragment_free_space(
		struct btrfs_block_group_cache *block_group)
		struct btrfs_block_group *block_group)
{
	struct btrfs_fs_info *fs_info = block_group->fs_info;

@@ -170,29 +171,29 @@ static inline int btrfs_should_fragment_free_space(
}
#endif

struct btrfs_block_group_cache *btrfs_lookup_first_block_group(
struct btrfs_block_group *btrfs_lookup_first_block_group(
		struct btrfs_fs_info *info, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_block_group *btrfs_lookup_block_group(
		struct btrfs_fs_info *info, u64 bytenr);
struct btrfs_block_group_cache *btrfs_next_block_group(
		struct btrfs_block_group_cache *cache);
void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
struct btrfs_block_group *btrfs_next_block_group(
		struct btrfs_block_group *cache);
void btrfs_get_block_group(struct btrfs_block_group *cache);
void btrfs_put_block_group(struct btrfs_block_group *cache);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
					const u64 start);
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
				           u64 num_bytes);
int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache);
int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
int btrfs_cache_block_group(struct btrfs_block_group *cache,
			    int load_cache_only);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
		struct btrfs_block_group_cache *cache);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
		struct btrfs_block_group *cache);
u64 add_new_free_space(struct btrfs_block_group *block_group,
		       u64 start, u64 end);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
				struct btrfs_fs_info *fs_info,
@@ -200,21 +201,22 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
			   u64 type, u64 chunk_offset, u64 size);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
			     bool do_chunk_alloc);
void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
			     u64 bytenr, u64 num_bytes, int alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
			     u64 ram_bytes, u64 num_bytes, int delalloc);
void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
			       u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
		      enum btrfs_chunk_alloc_enum force);
@@ -239,8 +241,7 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
	return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}

static inline int btrfs_block_group_cache_done(
		struct btrfs_block_group_cache *cache)
static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
{
	smp_mb();
	return cache->cached == BTRFS_CACHE_FINISHED ||
Loading