Commit 1f08af52 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: implement swapext for rmap filesystems



Implement swapext for filesystems that have reverse mapping.  Back in
the reflink patches, we augmented the bmap code with a 'REMAP' flag
that updates only the bmbt and doesn't touch the allocator and
implemented log redo items for those two operations.  Now we can
rewrite extent swapping as a (looong) series of remap operations.

This is far less efficient than the fork swapping method implemented
in the past, so we only switch this on for rmap.

Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 39aff5fd
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -21,6 +21,8 @@
/*
 * Components of space reservations.
 */
#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)    \
		(((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)    \
		(((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
#define	XFS_EXTENTADD_SPACE_RES(mp,w)	(XFS_BM_MAXLEVELS(mp,w) - 1)
@@ -28,6 +30,13 @@
	(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
	  XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
	  XFS_EXTENTADD_SPACE_RES(mp,w))
#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\
	(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
	  XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
	  XFS_EXTENTADD_SPACE_RES(mp,w) + \
	 ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
	  XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
	  (mp)->m_rmap_maxlevels)
#define	XFS_DAENTER_1B(mp,w)	\
	((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
#define	XFS_DAENTER_DBS(mp,w)	\
+159 −3
Original line number Diff line number Diff line
@@ -1580,6 +1580,13 @@ xfs_swap_extents_check_format(
	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
		return -EINVAL;

	/*
	 * If we have to use the (expensive) rmap swap method, we can
	 * handle any number of extents and any format.
	 */
	if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
		return 0;

	/*
	 * if the target inode is in extent form and the temp inode is in btree
	 * form then we will end up with the target inode in the wrong format
@@ -1649,6 +1656,130 @@ xfs_swap_extent_flush(
	return 0;
}

/*
 * Move extents from one file to another, when rmap is enabled.
 */
STATIC int
xfs_swap_extent_rmap(
	struct xfs_trans		**tpp,
	struct xfs_inode		*ip,
	struct xfs_inode		*tip)
{
	struct xfs_bmbt_irec		irec;
	struct xfs_bmbt_irec		uirec;
	struct xfs_bmbt_irec		tirec;
	xfs_fileoff_t			offset_fsb;
	xfs_fileoff_t			end_fsb;
	xfs_filblks_t			count_fsb;
	xfs_fsblock_t			firstfsb;
	struct xfs_defer_ops		dfops;
	int				error;
	xfs_filblks_t			ilen;
	xfs_filblks_t			rlen;
	int				nimaps;
	__uint64_t			tip_flags2;

	/*
	 * If the source file has shared blocks, we must flag the donor
	 * file as having shared blocks so that we get the shared-block
	 * rmap functions when we go to fix up the rmaps.  The flags
	 * will be switch for reals later.
	 */
	tip_flags2 = tip->i_d.di_flags2;
	if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)
		tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;

	offset_fsb = 0;
	end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);

	while (count_fsb) {
		/* Read extent from the donor file */
		nimaps = 1;
		error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
				&nimaps, 0);
		if (error)
			goto out;
		ASSERT(nimaps == 1);
		ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);

		trace_xfs_swap_extent_rmap_remap(tip, &tirec);
		ilen = tirec.br_blockcount;

		/* Unmap the old blocks in the source file. */
		while (tirec.br_blockcount) {
			xfs_defer_init(&dfops, &firstfsb);
			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);

			/* Read extent from the source file */
			nimaps = 1;
			error = xfs_bmapi_read(ip, tirec.br_startoff,
					tirec.br_blockcount, &irec,
					&nimaps, 0);
			if (error)
				goto out_defer;
			ASSERT(nimaps == 1);
			ASSERT(tirec.br_startoff == irec.br_startoff);
			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);

			/* Trim the extent. */
			uirec = tirec;
			uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
					tirec.br_blockcount,
					irec.br_blockcount);
			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);

			/* Remove the mapping from the donor file. */
			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
					tip, &uirec);
			if (error)
				goto out_defer;

			/* Remove the mapping from the source file. */
			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
					ip, &irec);
			if (error)
				goto out_defer;

			/* Map the donor file's blocks into the source file. */
			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
					ip, &uirec);
			if (error)
				goto out_defer;

			/* Map the source file's blocks into the donor file. */
			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
					tip, &irec);
			if (error)
				goto out_defer;

			error = xfs_defer_finish(tpp, &dfops, ip);
			if (error)
				goto out_defer;

			tirec.br_startoff += rlen;
			if (tirec.br_startblock != HOLESTARTBLOCK &&
			    tirec.br_startblock != DELAYSTARTBLOCK)
				tirec.br_startblock += rlen;
			tirec.br_blockcount -= rlen;
		}

		/* Roll on... */
		count_fsb -= ilen;
		offset_fsb += ilen;
	}

	tip->i_d.di_flags2 = tip_flags2;
	return 0;

out_defer:
	xfs_defer_cancel(&dfops);
out:
	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
	tip->i_d.di_flags2 = tip_flags2;
	return error;
}

/* Swap the extents of two files by swapping data forks. */
STATIC int
xfs_swap_extent_forks(
@@ -1799,6 +1930,7 @@ xfs_swap_extents(
	int			lock_flags;
	struct xfs_ifork	*cowfp;
	__uint64_t		f;
	int			resblks;

	/*
	 * Lock the inodes against other IO, page faults and truncate to
@@ -1829,7 +1961,28 @@ xfs_swap_extents(
	if (error)
		goto out_unlock;

	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
	/*
	 * Extent "swapping" with rmap requires a permanent reservation and
	 * a block reservation because it's really just a remap operation
	 * performed with log redo items!
	 */
	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
		/*
		 * Conceptually this shouldn't affect the shape of either
		 * bmbt, but since we atomically move extents one by one,
		 * we reserve enough space to rebuild both trees.
		 */
		resblks = XFS_SWAP_RMAP_SPACE_RES(mp,
				XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
				XFS_DATA_FORK) +
			  XFS_SWAP_RMAP_SPACE_RES(mp,
				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
				XFS_DATA_FORK);
		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
				0, 0, &tp);
	} else
		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
				0, 0, &tp);
	if (error)
		goto out_unlock;

@@ -1888,6 +2041,9 @@ xfs_swap_extents(
	src_log_flags = XFS_ILOG_CORE;
	target_log_flags = XFS_ILOG_CORE;

	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
		error = xfs_swap_extent_rmap(&tp, ip, tip);
	else
		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
				&target_log_flags);
	if (error)
+5 −0
Original line number Diff line number Diff line
@@ -3373,6 +3373,11 @@ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);

/* rmap swapext tracepoints */
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);

#endif /* _TRACE_XFS_H */

#undef TRACE_INCLUDE_PATH