Commit 84399bb0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from Chris Mason:
 "Outside of misc fixes, Filipe has a few fsync corners and we're
  pulling in one more of Josef's fixes from production use here"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs:__add_inode_ref: out of bounds memory read when looking for extended ref.
  Btrfs: fix data loss in the fast fsync path
  Btrfs: remove extra run_delayed_refs in update_cowonly_root
  Btrfs: incremental send, don't rename a directory too soon
  btrfs: fix lost return value due to variable shadowing
  Btrfs: do not ignore errors from btrfs_lookup_xattr in do_setxattr
  Btrfs: fix off-by-one logic error in btrfs_realloc_node
  Btrfs: add missing inode update when punching hole
  Btrfs: abort the transaction if we fail to update the free space cache inode
  Btrfs: fix fsync race leading to ordered extent memory leaks
parents 0d9b9c16 dd9ef135
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,

	parent_nritems = btrfs_header_nritems(parent);
	blocksize = root->nodesize;
	end_slot = parent_nritems;
	end_slot = parent_nritems - 1;

	if (parent_nritems == 1)
	if (parent_nritems <= 1)
		return 0;

	btrfs_set_lock_blocking(parent);

	for (i = start_slot; i < end_slot; i++) {
	for (i = start_slot; i <= end_slot; i++) {
		int close = 1;

		btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
			other = btrfs_node_blockptr(parent, i - 1);
			close = close_blocks(blocknr, other, blocksize);
		}
		if (!close && i < end_slot - 2) {
		if (!close && i < end_slot) {
			other = btrfs_node_blockptr(parent, i + 1);
			close = close_blocks(blocknr, other, blocksize);
		}
+16 −0
Original line number Diff line number Diff line
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
		return 0;
	}

	if (trans->aborted)
		return 0;
again:
	inode = lookup_free_space_inode(root, block_group, path);
	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
	 */
	BTRFS_I(inode)->generation = 0;
	ret = btrfs_update_inode(trans, root, inode);
	if (ret) {
		/*
		 * So theoretically we could recover from this, simply set the
		 * super cache generation to 0 so we know to invalidate the
		 * cache, but then we'd have to keep track of the block groups
		 * that fail this way so we know we _have_ to reset this cache
		 * before the next commit or risk reading stale cache.  So to
		 * limit our exposure to horrible edge cases lets just abort the
		 * transaction, this only happens in really bad situations
		 * anyway.
		 */
		btrfs_abort_transaction(trans, root, ret);
		goto out_put;
	}
	WARN_ON(ret);

	if (i_size_read(inode) > 0) {
+56 −31
Original line number Diff line number Diff line
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
	mutex_unlock(&inode->i_mutex);

	/*
	 * we want to make sure fsync finds this change
	 * but we haven't joined a transaction running right now.
	 *
	 * Later on, someone is sure to update the inode and get the
	 * real transid recorded.
	 *
	 * We set last_trans now to the fs_info generation + 1,
	 * this will either be one more than the running transaction
	 * or the generation used for the next transaction if there isn't
	 * one running right now.
	 *
	 * We also have to set last_sub_trans to the current log transid,
	 * otherwise subsequent syncs to a file that's been synced in this
	 * transaction will appear to have already occured.
	 */
	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
	BTRFS_I(inode)->last_sub_trans = root->log_transid;
	if (num_written > 0) {
		err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
	atomic_inc(&root->log_batch);

	/*
	 * check the transaction that last modified this inode
	 * and see if its already been committed
	 */
	if (!BTRFS_I(inode)->last_trans) {
		mutex_unlock(&inode->i_mutex);
		goto out;
	}

	/*
	 * if the last transaction that changed this file was before
	 * the current transaction, we can bail out now without any
	 * syncing
	 * If the last transaction that changed this file was before the current
	 * transaction and we have the full sync flag set in our inode, we can
	 * bail out now without any syncing.
	 *
	 * Note that we can't bail out if the full sync flag isn't set. This is
	 * because when the full sync flag is set we start all ordered extents
	 * and wait for them to fully complete - when they complete they update
	 * the inode's last_trans field through:
	 *
	 *     btrfs_finish_ordered_io() ->
	 *         btrfs_update_inode_fallback() ->
	 *             btrfs_update_inode() ->
	 *                 btrfs_set_inode_last_trans()
	 *
	 * So we are sure that last_trans is up to date and can do this check to
	 * bail out safely. For the fast path, when the full sync flag is not
	 * set in our inode, we can not do it because we start only our ordered
	 * extents and don't wait for them to complete (that is when
	 * btrfs_finish_ordered_io runs), so here at this point their last_trans
	 * value might be less than or equals to fs_info->last_trans_committed,
	 * and setting a speculative last_trans for an inode when a buffered
	 * write is made (such as fs_info->generation + 1 for example) would not
	 * be reliable since after setting the value and before fsync is called
	 * any number of transactions can start and commit (transaction kthread
	 * commits the current transaction periodically), and a transaction
	 * commit does not start nor waits for ordered extents to complete.
	 */
	smp_mb();
	if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
	    BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
		BTRFS_I(inode)->last_trans = 0;

	    (full_sync && BTRFS_I(inode)->last_trans <=
	     root->fs_info->last_trans_committed)) {
		/*
		 * We'v had everything committed since the last time we were
		 * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
	bool same_page;
	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
	u64 ino_size;
	bool truncated_page = false;
	bool updated_inode = false;

	ret = btrfs_wait_ordered_range(inode, offset, len);
	if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
	 * entire page.
	 */
	if (same_page && len < PAGE_CACHE_SIZE) {
		if (offset < ino_size)
		if (offset < ino_size) {
			truncated_page = true;
			ret = btrfs_truncate_page(inode, offset, len, 0);
		} else {
			ret = 0;
		}
		goto out_only_mutex;
	}

	/* zero back part of the first page */
	if (offset < ino_size) {
		truncated_page = true;
		ret = btrfs_truncate_page(inode, offset, 0, 0);
		if (ret) {
			mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
		if (!ret) {
			/* zero the front end of the last page */
			if (tail_start + tail_len < ino_size) {
				truncated_page = true;
				ret = btrfs_truncate_page(inode,
						tail_start + tail_len, 0, 1);
				if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
	}

	if (lockend < lockstart) {
		mutex_unlock(&inode->i_mutex);
		return 0;
		ret = 0;
		goto out_only_mutex;
	}

	while (1) {
@@ -2506,6 +2514,7 @@ out_trans:

	trans->block_rsv = &root->fs_info->trans_block_rsv;
	ret = btrfs_update_inode(trans, root, inode);
	updated_inode = true;
	btrfs_end_transaction(trans, root);
	btrfs_btree_balance_dirty(root);
out_free:
@@ -2515,6 +2524,22 @@ out:
	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			     &cached_state, GFP_NOFS);
out_only_mutex:
	if (!updated_inode && truncated_page && !ret && !err) {
		/*
		 * If we only end up zeroing part of a page, we still need to
		 * update the inode item, so that all the time fields are
		 * updated as well as the necessary btrfs inode in memory fields
		 * for detecting, at fsync time, if the inode isn't yet in the
		 * log tree or it's there but not up to date.
		 */
		trans = btrfs_start_transaction(root, 1);
		if (IS_ERR(trans)) {
			err = PTR_ERR(trans);
		} else {
			err = btrfs_update_inode(trans, root, inode);
			ret = btrfs_end_transaction(trans, root);
		}
	}
	mutex_unlock(&inode->i_mutex);
	if (ret && !err)
		err = ret;
+0 −1
Original line number Diff line number Diff line
@@ -7285,7 +7285,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
	    ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
	     em->block_start != EXTENT_MAP_HOLE)) {
		int type;
		int ret;
		u64 block_start, orig_start, orig_block_len, ram_bytes;

		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+2 −5
Original line number Diff line number Diff line
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
			continue;
		if (entry_end(ordered) <= start)
			break;
		if (!list_empty(&ordered->log_list))
			continue;
		if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
			continue;
		list_add(&ordered->log_list, logged_list);
		atomic_inc(&ordered->refs);
@@ -511,7 +509,6 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
						   &ordered->flags));

		if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
		list_add_tail(&ordered->trans_list, &trans->ordered);
		spin_lock_irq(&log->log_extents_lock[index]);
	}
Loading