Commit 664ffb8a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong
Browse files

xfs: move the buffer retry logic to xfs_buf.c



Move the buffer retry state machine logic to xfs_buf.c and call it once
from xfs_ioend instead of duplicating it three times for the three kinds
of buffers.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 23fb5a93
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -177,9 +177,9 @@ xfs_trans_log_inode(

	/*
	 * Always OR in the bits from the ili_last_fields field.  This is to
	 * coordinate with the xfs_iflush() and xfs_iflush_done() routines in
	 * the eventual clearing of the ili_fields bits.  See the big comment in
	 * xfs_iflush() for an explanation of this coordination mechanism.
	 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
	 * in the eventual clearing of the ili_fields bits.  See the big comment
	 * in xfs_iflush() for an explanation of this coordination mechanism.
	 */
	iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
	spin_unlock(&iip->ili_lock);
+170 −3
Original line number Diff line number Diff line
@@ -1171,8 +1171,145 @@ xfs_buf_wait_unpin(
}

/*
 *	Buffer Utility Routines
 * Decide if we're going to retry the write after a failure, and prepare
 * the buffer for retrying the write.
 */
static bool
xfs_buf_ioerror_fail_without_retry(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_mount;
	static unsigned long	lasttime;
	static struct xfs_buftarg *lasttarg;

	/*
	 * If we've already decided to shutdown the filesystem because of
	 * I/O errors, there's no point in giving this a retry.
	 */
	if (XFS_FORCED_SHUTDOWN(mp))
		return true;

	if (bp->b_target != lasttarg ||
	    time_after(jiffies, (lasttime + 5*HZ))) {
		lasttime = jiffies;
		xfs_buf_ioerror_alert(bp, __this_address);
	}
	lasttarg = bp->b_target;

	/* synchronous writes will have callers process the error */
	if (!(bp->b_flags & XBF_ASYNC))
		return true;
	return false;
}

static bool
xfs_buf_ioerror_retry(
	struct xfs_buf		*bp,
	struct xfs_error_cfg	*cfg)
{
	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
	    bp->b_last_error == bp->b_error)
		return false;

	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
	bp->b_last_error = bp->b_error;
	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
	    !bp->b_first_retry_time)
		bp->b_first_retry_time = jiffies;
	return true;
}

/*
 * Account for this latest trip around the retry handler, and decide if
 * we've failed enough times to constitute a permanent failure.
 */
static bool
xfs_buf_ioerror_permanent(
	struct xfs_buf		*bp,
	struct xfs_error_cfg	*cfg)
{
	struct xfs_mount	*mp = bp->b_mount;

	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
	    ++bp->b_retries > cfg->max_retries)
		return true;
	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
		return true;

	/* At unmount we may treat errors differently */
	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
		return true;

	return false;
}

/*
 * On a sync write or shutdown we just want to stale the buffer and let the
 * caller handle the error in bp->b_error appropriately.
 *
 * If the write was asynchronous then no one will be looking for the error.  If
 * this is the first failure of this type, clear the error state and write the
 * buffer out again. This means we always retry an async write failure at least
 * once, but we also need to set the buffer up to behave correctly now for
 * repeated failures.
 *
 * If we get repeated async write failures, then we take action according to the
 * error configuration we have been set up to use.
 *
 * Multi-state return value:
 *
 * XBF_IOEND_FINISH: run callback completions
 * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
 * XBF_IOEND_FAIL: transient error, run failure callback completions and then
 *    release the buffer
 */
enum xfs_buf_ioend_disposition {
	XBF_IOEND_FINISH,
	XBF_IOEND_DONE,
	XBF_IOEND_FAIL,
};

static enum xfs_buf_ioend_disposition
xfs_buf_ioend_disposition(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_mount;
	struct xfs_error_cfg	*cfg;

	if (likely(!bp->b_error))
		return XBF_IOEND_FINISH;

	if (xfs_buf_ioerror_fail_without_retry(bp))
		goto out_stale;

	trace_xfs_buf_iodone_async(bp, _RET_IP_);

	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
	if (xfs_buf_ioerror_retry(bp, cfg)) {
		xfs_buf_ioerror(bp, 0);
		xfs_buf_submit(bp);
		return XBF_IOEND_DONE;
	}

	/*
	 * Permanent error - we need to trigger a shutdown if we haven't already
	 * to indicate that inconsistency will result from this action.
	 */
	if (xfs_buf_ioerror_permanent(bp, cfg)) {
		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
		goto out_stale;
	}

	/* Still considered a transient error. Caller will schedule retries. */
	return XBF_IOEND_FAIL;

out_stale:
	xfs_buf_stale(bp);
	bp->b_flags |= XBF_DONE;
	trace_xfs_buf_error_relse(bp, _RET_IP_);
	return XBF_IOEND_FINISH;
}

static void
xfs_buf_ioend(
@@ -1210,12 +1347,42 @@ xfs_buf_ioend(
			bp->b_flags |= XBF_DONE;
		}

		switch (xfs_buf_ioend_disposition(bp)) {
		case XBF_IOEND_DONE:
			return;
		case XBF_IOEND_FAIL:
			if (bp->b_flags & _XBF_INODES)
				xfs_buf_inode_io_fail(bp);
			else if (bp->b_flags & _XBF_DQUOTS)
				xfs_buf_dquot_io_fail(bp);
			else
				ASSERT(list_empty(&bp->b_li_list));
			xfs_buf_ioerror(bp, 0);
			xfs_buf_relse(bp);
			return;
		default:
			break;
		}

		/* clear the retry state */
		bp->b_last_error = 0;
		bp->b_retries = 0;
		bp->b_first_retry_time = 0;

		/*
		 * Note that for things like remote attribute buffers, there may
		 * not be a buffer log item here, so processing the buffer log
		 * item must remain optional.
		 */
		if (bp->b_log_item)
			xfs_buf_item_done(bp);

		if (bp->b_flags & _XBF_INODES)
			xfs_buf_inode_iodone(bp);
		else if (bp->b_flags & _XBF_DQUOTS)
			xfs_buf_dquot_iodone(bp);
		else
			xfs_buf_iodone(bp);

		xfs_buf_ioend_finish(bp);
	}
}

+2 −258
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
	return container_of(lip, struct xfs_buf_log_item, bli_item);
}

static void xfs_buf_item_done(struct xfs_buf *bp);

/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
@@ -463,7 +461,7 @@ xfs_buf_item_unpin(
		 */
		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
			xfs_buf_item_done(bp);
			xfs_iflush_done(bp);
			xfs_buf_inode_iodone(bp);
			ASSERT(list_empty(&bp->b_li_list));
		} else {
			xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
@@ -956,156 +954,12 @@ xfs_buf_item_relse(
	xfs_buf_item_free(bip);
}

/*
 * Decide if we're going to retry the write after a failure, and prepare
 * the buffer for retrying the write.
 */
static bool
xfs_buf_ioerror_fail_without_retry(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_mount;
	static ulong		lasttime;
	static xfs_buftarg_t	*lasttarg;

	/*
	 * If we've already decided to shutdown the filesystem because of
	 * I/O errors, there's no point in giving this a retry.
	 */
	if (XFS_FORCED_SHUTDOWN(mp))
		return true;

	if (bp->b_target != lasttarg ||
	    time_after(jiffies, (lasttime + 5*HZ))) {
		lasttime = jiffies;
		xfs_buf_ioerror_alert(bp, __this_address);
	}
	lasttarg = bp->b_target;

	/* synchronous writes will have callers process the error */
	if (!(bp->b_flags & XBF_ASYNC))
		return true;
	return false;
}

static bool
xfs_buf_ioerror_retry(
	struct xfs_buf		*bp,
	struct xfs_error_cfg	*cfg)
{
	if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
	    bp->b_last_error == bp->b_error)
		return false;

	bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
	bp->b_last_error = bp->b_error;
	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
	    !bp->b_first_retry_time)
		bp->b_first_retry_time = jiffies;
	return true;
}

/*
 * Account for this latest trip around the retry handler, and decide if
 * we've failed enough times to constitute a permanent failure.
 */
static bool
xfs_buf_ioerror_permanent(
	struct xfs_buf		*bp,
	struct xfs_error_cfg	*cfg)
{
	struct xfs_mount	*mp = bp->b_mount;

	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
	    ++bp->b_retries > cfg->max_retries)
		return true;
	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
		return true;

	/* At unmount we may treat errors differently */
	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
		return true;

	return false;
}

/*
 * On a sync write or shutdown we just want to stale the buffer and let the
 * caller handle the error in bp->b_error appropriately.
 *
 * If the write was asynchronous then no one will be looking for the error.  If
 * this is the first failure of this type, clear the error state and write the
 * buffer out again. This means we always retry an async write failure at least
 * once, but we also need to set the buffer up to behave correctly now for
 * repeated failures.
 *
 * If we get repeated async write failures, then we take action according to the
 * error configuration we have been set up to use.
 *
 * Multi-state return value:
 *
 * XBF_IOEND_FINISH: run callback completions
 * XBF_IOEND_DONE: resubmitted immediately, do not run any completions
 * XBF_IOEND_FAIL: transient error, run failure callback completions and then
 *    release the buffer
 */
enum xfs_buf_ioend_disposition {
	XBF_IOEND_FINISH,
	XBF_IOEND_DONE,
	XBF_IOEND_FAIL,
};

static enum xfs_buf_ioend_disposition
xfs_buf_ioend_disposition(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_mount;
	struct xfs_error_cfg	*cfg;

	if (likely(!bp->b_error))
		return XBF_IOEND_FINISH;

	if (xfs_buf_ioerror_fail_without_retry(bp))
		goto out_stale;

	trace_xfs_buf_item_iodone_async(bp, _RET_IP_);

	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
	if (xfs_buf_ioerror_retry(bp, cfg)) {
		xfs_buf_ioerror(bp, 0);
		xfs_buf_submit(bp);
		return XBF_IOEND_DONE;
	}

	/*
	 * Permanent error - we need to trigger a shutdown if we haven't already
	 * to indicate that inconsistency will result from this action.
	 */
	if (xfs_buf_ioerror_permanent(bp, cfg)) {
		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
		goto out_stale;
	}

	/* Still considered a transient error. Caller will schedule retries. */
	return XBF_IOEND_FAIL;

out_stale:
	xfs_buf_stale(bp);
	bp->b_flags |= XBF_DONE;
	trace_xfs_buf_error_relse(bp, _RET_IP_);
	return XBF_IOEND_FINISH;
}

static void
void
xfs_buf_item_done(
	struct xfs_buf		*bp)
{
	struct xfs_buf_log_item	*bip = bp->b_log_item;

	if (!bip)
		return;

	/*
	 * If we are forcibly shutting down, this may well be off the AIL
	 * already. That's because we simulate the log-committed callbacks to
@@ -1120,113 +974,3 @@ xfs_buf_item_done(
	xfs_buf_item_free(bip);
	xfs_buf_rele(bp);
}

static inline void
xfs_buf_clear_ioerror_retry_state(
	struct xfs_buf		*bp)
{
	bp->b_last_error = 0;
	bp->b_retries = 0;
	bp->b_first_retry_time = 0;
}

static void
xfs_buf_inode_io_fail(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip;

	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
		set_bit(XFS_LI_FAILED, &lip->li_flags);

	xfs_buf_ioerror(bp, 0);
	xfs_buf_relse(bp);
}

/*
 * Inode buffer iodone callback function.
 */
void
xfs_buf_inode_iodone(
	struct xfs_buf		*bp)
{
	switch (xfs_buf_ioend_disposition(bp)) {
	case XBF_IOEND_DONE:
		return;
	case XBF_IOEND_FAIL:
		xfs_buf_inode_io_fail(bp);
		return;
	default:
		break;
	}

	xfs_buf_clear_ioerror_retry_state(bp);
	xfs_buf_item_done(bp);
	xfs_iflush_done(bp);
	xfs_buf_ioend_finish(bp);
}

static void
xfs_buf_dquot_io_fail(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip;

	spin_lock(&bp->b_mount->m_ail->ail_lock);
	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
		xfs_set_li_failed(lip, bp);
	spin_unlock(&bp->b_mount->m_ail->ail_lock);
	xfs_buf_ioerror(bp, 0);
	xfs_buf_relse(bp);
}

/*
 * Dquot buffer iodone callback function.
 */
void
xfs_buf_dquot_iodone(
	struct xfs_buf		*bp)
{
	switch (xfs_buf_ioend_disposition(bp)) {
	case XBF_IOEND_DONE:
		return;
	case XBF_IOEND_FAIL:
		xfs_buf_dquot_io_fail(bp);
		return;
	default:
		break;
	}

	xfs_buf_clear_ioerror_retry_state(bp);
	/* a newly allocated dquot buffer might have a log item attached */
	xfs_buf_item_done(bp);
	xfs_dquot_done(bp);
	xfs_buf_ioend_finish(bp);
}

/*
 * Dirty buffer iodone callback function.
 *
 * Note that for things like remote attribute buffers, there may not be a buffer
 * log item here, so processing the buffer log item must remain be optional.
 */
void
xfs_buf_iodone(
	struct xfs_buf		*bp)
{
	switch (xfs_buf_ioend_disposition(bp)) {
	case XBF_IOEND_DONE:
		return;
	case XBF_IOEND_FAIL:
		ASSERT(list_empty(&bp->b_li_list));
		xfs_buf_ioerror(bp, 0);
		xfs_buf_relse(bp);
		return;
	default:
		break;
	}

	xfs_buf_clear_ioerror_retry_state(bp);
	xfs_buf_item_done(bp);
	xfs_buf_ioend_finish(bp);
}
+12 −0
Original line number Diff line number Diff line
@@ -50,12 +50,24 @@ struct xfs_buf_log_item {
};

int	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void	xfs_buf_item_done(struct xfs_buf *bp);
void	xfs_buf_item_relse(struct xfs_buf *);
bool	xfs_buf_item_put(struct xfs_buf_log_item *);
void	xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool	xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void	xfs_buf_inode_iodone(struct xfs_buf *);
void	xfs_buf_inode_io_fail(struct xfs_buf *bp);
#ifdef CONFIG_XFS_QUOTA
void	xfs_buf_dquot_iodone(struct xfs_buf *);
void	xfs_buf_dquot_io_fail(struct xfs_buf *bp);
#else
static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp)
{
}
static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp)
{
}
#endif /* CONFIG_XFS_QUOTA */
void	xfs_buf_iodone(struct xfs_buf *);
bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);

+13 −1
Original line number Diff line number Diff line
@@ -1107,7 +1107,7 @@ xfs_qm_dqflush_done(
}

void
xfs_dquot_done(
xfs_buf_dquot_iodone(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip, *n;
@@ -1118,6 +1118,18 @@ xfs_dquot_done(
	}
}

void
xfs_buf_dquot_io_fail(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip;

	spin_lock(&bp->b_mount->m_ail->ail_lock);
	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
		xfs_set_li_failed(lip, bp);
	spin_unlock(&bp->b_mount->m_ail->ail_lock);
}

/* Check incore dquot for errors before we flush. */
static xfs_failaddr_t
xfs_qm_dqflush_check(
Loading