Commit d41bb034 authored by Dave Chinner's avatar Dave Chinner
Browse files

Merge branch 'xfs-rename-whiteout' into for-next

Conflicts:
	fs/xfs/xfs_inode.c
parents 88e8fda9 7dcf5c3e
Loading
Loading
Loading
Loading
+238 −170
Original line number Diff line number Diff line
@@ -391,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
}

/*
 * The following routine will lock n inodes in exclusive mode.
 * We assume the caller calls us with the inodes in i_ino order.
 * The following routine will lock n inodes in exclusive mode.  We assume the
 * caller calls us with the inodes in i_ino order.
 *
 * We need to detect deadlock where an inode that we lock
 * is in the AIL and we start waiting for another inode that is locked
 * by a thread in a long running transaction (such as truncate). This can
 * result in deadlock since the long running trans might need to wait
 * for the inode we just locked in order to push the tail and free space
 * in the log.
 * We need to detect deadlock where an inode that we lock is in the AIL and we
 * start waiting for another inode that is locked by a thread in a long running
 * transaction (such as truncate). This can result in deadlock since the long
 * running trans might need to wait for the inode we just locked in order to
 * push the tail and free space in the log.
 */
void
xfs_lock_inodes(
@@ -410,11 +409,11 @@ xfs_lock_inodes(
	int		attempts = 0, i, j, try_lock;
	xfs_log_item_t	*lp;

	ASSERT(ips && (inodes >= 2)); /* we need at least two */
	/* currently supports between 2 and 5 inodes */
	ASSERT(ips && inodes >= 2 && inodes <= 5);

	try_lock = 0;
	i = 0;

again:
	for (; i < inodes; i++) {
		ASSERT(ips[i]);
@@ -423,19 +422,16 @@ again:
			continue;

		/*
		 * If try_lock is not set yet, make sure all locked inodes
		 * are not in the AIL.
		 * If any are, set try_lock to be used later.
		 * If try_lock is not set yet, make sure all locked inodes are
		 * not in the AIL.  If any are, set try_lock to be used later.
		 */

		if (!try_lock) {
			for (j = (i - 1); j >= 0 && !try_lock; j--) {
				lp = (xfs_log_item_t *)ips[j]->i_itemp;
				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				if (lp && (lp->li_flags & XFS_LI_IN_AIL))
					try_lock++;
			}
		}
		}

		/*
		 * If any of the previous locks we have locked is in the AIL,
@@ -443,33 +439,28 @@ again:
		 * we can't get any, we must release all we have
		 * and try again.
		 */
		if (!try_lock) {
			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
			continue;
		}

		if (try_lock) {
			/* try_lock must be 0 if i is 0. */
			/*
			 * try_lock means we have an inode locked
			 * that is in the AIL.
			 */
		/* try_lock means we have an inode locked that is in the AIL. */
		ASSERT(i != 0);
			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
				attempts++;
		if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
			continue;

		/*
				 * Unlock all previous guys and try again.
				 * xfs_iunlock will try to push the tail
				 * if the inode is in the AIL.
		 * Unlock all previous guys and try again.  xfs_iunlock will try
		 * to push the tail if the inode is in the AIL.
		 */

		attempts++;
		for (j = i - 1; j >= 0; j--) {

			/*
					 * Check to see if we've already
					 * unlocked this one.
					 * Not the first one going back,
					 * and the inode ptr is the same.
			 * Check to see if we've already unlocked this one.  Not
			 * the first one going back, and the inode ptr is the
			 * same.
			 */
					if ((j != (i - 1)) && ips[j] ==
								ips[j+1])
			if (j != (i - 1) && ips[j] == ips[j + 1])
				continue;

			xfs_iunlock(ips[j], lock_mode);
@@ -485,10 +476,6 @@ again:
		try_lock = 0;
		goto again;
	}
		} else {
			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
		}
	}

#ifdef DEBUG
	if (attempts) {
@@ -2681,19 +2668,22 @@ xfs_remove(
/*
 * Enter all inodes for a rename transaction into a sorted array.
 */
#define __XFS_SORT_INODES	5
STATIC void
xfs_sort_for_rename(
	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
	xfs_inode_t	*ip1,	/* in: inode of old entry */
	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
				   already exists, NULL otherwise. */
	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
	int		*num_inodes)  /* out: number of inodes in array */
	struct xfs_inode	*dp1,	/* in: old (source) directory inode */
	struct xfs_inode	*dp2,	/* in: new (target) directory inode */
	struct xfs_inode	*ip1,	/* in: inode of old entry */
	struct xfs_inode	*ip2,	/* in: inode of new entry */
	struct xfs_inode	*wip,	/* in: whiteout inode */
	struct xfs_inode	**i_tab,/* out: sorted array of inodes */
	int			*num_inodes)  /* in/out: inodes in array */
{
	xfs_inode_t		*temp;
	int			i, j;

	ASSERT(*num_inodes == __XFS_SORT_INODES);
	memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));

	/*
	 * i_tab contains a list of pointers to inodes.  We initialize
	 * the table here & we'll sort it.  We will then use it to
@@ -2701,25 +2691,24 @@ xfs_sort_for_rename(
	 *
	 * Note that the table may contain duplicates.  e.g., dp1 == dp2.
	 */
	i_tab[0] = dp1;
	i_tab[1] = dp2;
	i_tab[2] = ip1;
	if (ip2) {
		*num_inodes = 4;
		i_tab[3] = ip2;
	} else {
		*num_inodes = 3;
		i_tab[3] = NULL;
	}
	i = 0;
	i_tab[i++] = dp1;
	i_tab[i++] = dp2;
	i_tab[i++] = ip1;
	if (ip2)
		i_tab[i++] = ip2;
	if (wip)
		i_tab[i++] = wip;
	*num_inodes = i;

	/*
	 * Sort the elements via bubble sort.  (Remember, there are at
	 * most 4 elements to sort, so this is adequate.)
	 * most 5 elements to sort, so this is adequate.)
	 */
	for (i = 0; i < *num_inodes; i++) {
		for (j = 1; j < *num_inodes; j++) {
			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
				temp = i_tab[j];
				struct xfs_inode *temp = i_tab[j];
				i_tab[j] = i_tab[j-1];
				i_tab[j-1] = temp;
			}
@@ -2727,6 +2716,31 @@ xfs_sort_for_rename(
	}
}

static int
xfs_finish_rename(
	struct xfs_trans	*tp,
	struct xfs_bmap_free	*free_list)
{
	int			committed = 0;
	int			error;

	/*
	 * If this is a synchronous mount, make sure that the rename transaction
	 * goes to disk before returning to the user.
	 */
	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
		xfs_trans_set_sync(tp);

	error = xfs_bmap_finish(&tp, free_list, &committed);
	if (error) {
		xfs_bmap_cancel(free_list);
		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
		return error;
	}

	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
}

/*
 * xfs_cross_rename()
 *
@@ -2755,14 +2769,14 @@ xfs_cross_rename(
				ip2->i_ino,
				first_block, free_list, spaceres);
	if (error)
		goto out;
		goto out_trans_abort;

	/* Swap inode number for dirent in second parent */
	error = xfs_dir_replace(tp, dp2, name2,
				ip1->i_ino,
				first_block, free_list, spaceres);
	if (error)
		goto out;
		goto out_trans_abort;

	/*
	 * If we're renaming one or more directories across different parents,
@@ -2777,16 +2791,16 @@ xfs_cross_rename(
						dp1->i_ino, first_block,
						free_list, spaceres);
			if (error)
				goto out;
				goto out_trans_abort;

			/* transfer ip2 ".." reference to dp1 */
			if (!S_ISDIR(ip1->i_d.di_mode)) {
				error = xfs_droplink(tp, dp2);
				if (error)
					goto out;
					goto out_trans_abort;
				error = xfs_bumplink(tp, dp1);
				if (error)
					goto out;
					goto out_trans_abort;
			}

			/*
@@ -2804,16 +2818,16 @@ xfs_cross_rename(
						dp2->i_ino, first_block,
						free_list, spaceres);
			if (error)
				goto out;
				goto out_trans_abort;

			/* transfer ip1 ".." reference to dp2 */
			if (!S_ISDIR(ip2->i_d.di_mode)) {
				error = xfs_droplink(tp, dp1);
				if (error)
					goto out;
					goto out_trans_abort;
				error = xfs_bumplink(tp, dp2);
				if (error)
					goto out;
					goto out_trans_abort;
			}

			/*
@@ -2841,66 +2855,108 @@ xfs_cross_rename(
	}
	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
out:
	return xfs_finish_rename(tp, free_list);

out_trans_abort:
	xfs_bmap_cancel(free_list);
	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
	return error;
}

/*
 * xfs_rename_alloc_whiteout()
 *
 * Return a referenced, unlinked, unlocked inode that that can be used as a
 * whiteout in a rename transaction. We use a tmpfile inode here so that if we
 * crash between allocating the inode and linking it into the rename transaction
 * recovery will free the inode and we won't leak it.
 */
static int
xfs_rename_alloc_whiteout(
	struct xfs_inode	*dp,
	struct xfs_inode	**wip)
{
	struct xfs_inode	*tmpfile;
	int			error;

	error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
	if (error)
		return error;

	/* Satisfy xfs_bumplink that this is a real tmpfile */
	xfs_finish_inode_setup(tmpfile);
	VFS_I(tmpfile)->i_state |= I_LINKABLE;

	*wip = tmpfile;
	return 0;
}

/*
 * xfs_rename
 */
int
xfs_rename(
	xfs_inode_t	*src_dp,
	struct xfs_inode	*src_dp,
	struct xfs_name		*src_name,
	xfs_inode_t	*src_ip,
	xfs_inode_t	*target_dp,
	struct xfs_inode	*src_ip,
	struct xfs_inode	*target_dp,
	struct xfs_name		*target_name,
	xfs_inode_t	*target_ip,
	struct xfs_inode	*target_ip,
	unsigned int		flags)
{
	xfs_trans_t	*tp = NULL;
	xfs_mount_t	*mp = src_dp->i_mount;
	int		new_parent;		/* moving to a new dir */
	int		src_is_directory;	/* src_name is a directory */
	int		error;
	xfs_bmap_free_t free_list;
	struct xfs_mount	*mp = src_dp->i_mount;
	struct xfs_trans	*tp;
	struct xfs_bmap_free	free_list;
	xfs_fsblock_t		first_block;
	int		cancel_flags;
	int		committed;
	xfs_inode_t	*inodes[4];
	struct xfs_inode	*wip = NULL;		/* whiteout inode */
	struct xfs_inode	*inodes[__XFS_SORT_INODES];
	int			num_inodes = __XFS_SORT_INODES;
	int			new_parent = (src_dp != target_dp);
	int			src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
	int			cancel_flags = 0;
	int			spaceres;
	int		num_inodes;
	int			error;

	trace_xfs_rename(src_dp, target_dp, src_name, target_name);

	new_parent = (src_dp != target_dp);
	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
	if ((flags & RENAME_EXCHANGE) && !target_ip)
		return -EINVAL;

	/*
	 * If we are doing a whiteout operation, allocate the whiteout inode
	 * we will be placing at the target and ensure the type is set
	 * appropriately.
	 */
	if (flags & RENAME_WHITEOUT) {
		ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
		error = xfs_rename_alloc_whiteout(target_dp, &wip);
		if (error)
			return error;

	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
		/* setup target dirent info as whiteout */
		src_name->type = XFS_DIR3_FT_CHRDEV;
	}

	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
				inodes, &num_inodes);

	xfs_bmap_init(&free_list, &first_block);
	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
	if (error == -ENOSPC) {
		spaceres = 0;
		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
	}
	if (error) {
		xfs_trans_cancel(tp, 0);
		goto std_return;
	}
	if (error)
		goto out_trans_cancel;
	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;

	/*
	 * Attach the dquots to the inodes
	 */
	error = xfs_qm_vop_rename_dqattach(inodes);
	if (error) {
		xfs_trans_cancel(tp, cancel_flags);
		goto std_return;
	}
	if (error)
		goto out_trans_cancel;

	/*
	 * Lock all the participating inodes. Depending upon whether
@@ -2921,6 +2977,8 @@ xfs_rename(
	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
	if (target_ip)
		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
	if (wip)
		xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);

	/*
	 * If we are using project inheritance, we only allow renames
@@ -2930,24 +2988,16 @@ xfs_rename(
	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
		     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
		error = -EXDEV;
		goto error_return;
		goto out_trans_cancel;
	}

	/*
	 * Handle RENAME_EXCHANGE flags
	 */
	if (flags & RENAME_EXCHANGE) {
		if (target_ip == NULL) {
			error = -EINVAL;
			goto error_return;
		}
		error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
	xfs_bmap_init(&free_list, &first_block);

	/* RENAME_EXCHANGE is unique from here on. */
	if (flags & RENAME_EXCHANGE)
		return xfs_cross_rename(tp, src_dp, src_name, src_ip,
					target_dp, target_name, target_ip,
					&free_list, &first_block, spaceres);
		if (error)
			goto abort_return;
		goto finish_rename;
	}

	/*
	 * Set up the target.
@@ -2960,7 +3010,7 @@ xfs_rename(
		if (!spaceres) {
			error = xfs_dir_canenter(tp, target_dp, target_name);
			if (error)
				goto error_return;
				goto out_trans_cancel;
		}
		/*
		 * If target does not exist and the rename crosses
@@ -2971,9 +3021,9 @@ xfs_rename(
						src_ip->i_ino, &first_block,
						&free_list, spaceres);
		if (error == -ENOSPC)
			goto error_return;
			goto out_bmap_cancel;
		if (error)
			goto abort_return;
			goto out_trans_abort;

		xfs_trans_ichgtime(tp, target_dp,
					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2981,7 +3031,7 @@ xfs_rename(
		if (new_parent && src_is_directory) {
			error = xfs_bumplink(tp, target_dp);
			if (error)
				goto abort_return;
				goto out_trans_abort;
		}
	} else { /* target_ip != NULL */
		/*
@@ -2996,7 +3046,7 @@ xfs_rename(
			if (!(xfs_dir_isempty(target_ip)) ||
			    (target_ip->i_d.di_nlink > 2)) {
				error = -EEXIST;
				goto error_return;
				goto out_trans_cancel;
			}
		}

@@ -3013,7 +3063,7 @@ xfs_rename(
					src_ip->i_ino,
					&first_block, &free_list, spaceres);
		if (error)
			goto abort_return;
			goto out_trans_abort;

		xfs_trans_ichgtime(tp, target_dp,
					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3024,7 +3074,7 @@ xfs_rename(
		 */
		error = xfs_droplink(tp, target_ip);
		if (error)
			goto abort_return;
			goto out_trans_abort;

		if (src_is_directory) {
			/*
@@ -3032,7 +3082,7 @@ xfs_rename(
			 */
			error = xfs_droplink(tp, target_ip);
			if (error)
				goto abort_return;
				goto out_trans_abort;
		}
	} /* target_ip != NULL */

@@ -3049,7 +3099,7 @@ xfs_rename(
					&first_block, &free_list, spaceres);
		ASSERT(error != -EEXIST);
		if (error)
			goto abort_return;
			goto out_trans_abort;
	}

	/*
@@ -3075,49 +3125,67 @@ xfs_rename(
		 */
		error = xfs_droplink(tp, src_dp);
		if (error)
			goto abort_return;
			goto out_trans_abort;
	}

	/*
	 * For whiteouts, we only need to update the source dirent with the
	 * inode number of the whiteout inode rather than removing it
	 * altogether.
	 */
	if (wip) {
		error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
					&first_block, &free_list, spaceres);
	} else
		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
					   &first_block, &free_list, spaceres);
	if (error)
		goto abort_return;
		goto out_trans_abort;

	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
	if (new_parent)
		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
	/*
	 * For whiteouts, we need to bump the link count on the whiteout inode.
	 * This means that failures all the way up to this point leave the inode
	 * on the unlinked list and so cleanup is a simple matter of dropping
	 * the remaining reference to it. If we fail here after bumping the link
	 * count, we're shutting down the filesystem so we'll never see the
	 * intermediate state on disk.
	 */
	if (wip) {
		ASSERT(wip->i_d.di_nlink == 0);
		error = xfs_bumplink(tp, wip);
		if (error)
			goto out_trans_abort;
		error = xfs_iunlink_remove(tp, wip);
		if (error)
			goto out_trans_abort;
		xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);

finish_rename:
		/*
	 * If this is a synchronous mount, make sure that the
	 * rename transaction goes to disk before returning to
	 * the user.
		 * Now we have a real link, clear the "I'm a tmpfile" state
		 * flag from the inode so it doesn't accidentally get misused in
		 * future.
		 */
	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
		xfs_trans_set_sync(tp);
		VFS_I(wip)->i_state &= ~I_LINKABLE;
	}

	error = xfs_bmap_finish(&tp, &free_list, &committed);
	if (error) {
		xfs_bmap_cancel(&free_list);
		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
				 XFS_TRANS_ABORT));
		goto std_return;
	}
	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
	if (new_parent)
		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);

	/*
	 * trans_commit will unlock src_ip, target_ip & decrement
	 * the vnode references.
	 */
	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
	error = xfs_finish_rename(tp, &free_list);
	if (wip)
		IRELE(wip);
	return error;

 abort_return:
out_trans_abort:
	cancel_flags |= XFS_TRANS_ABORT;
 error_return:
out_bmap_cancel:
	xfs_bmap_cancel(&free_list);
out_trans_cancel:
	xfs_trans_cancel(tp, cancel_flags);
 std_return:
	if (wip)
		IRELE(wip);
	return error;
}

+1 −1
Original line number Diff line number Diff line
@@ -394,7 +394,7 @@ xfs_vn_rename(
	struct xfs_name	oname;
	struct xfs_name	nname;

	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
		return -EINVAL;

	/* if we are exchanging files, we need to set i_mode of both files */