Merge branch 'xfs-torn-log-fixes-4.5' into for-next (9deed095) · Commits · 戴 / test

fs/xfs/xfs_log_recover.c

+168 −103

Original line number	Diff line number	Diff line
		@@ -1109,27 +1109,10 @@ xlog_verify_head(
		bool tmp_wrapped;

		/*
		* Search backwards through the log looking for the log record header
		* block. This wraps all the way back around to the head so something is
		* seriously wrong if we can't find it.
		*/
		found = xlog_rseek_logrec_hdr(log, head_blk, head_blk, 1, bp, rhead_blk,
		rhead, wrapped);
		if (found < 0)
		return found;
		if (!found) {
		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
		return -EIO;
		}

		tail_blk = BLOCK_LSN(be64_to_cpu((rhead)->h_tail_lsn));

		/*
		* Now that we have a tail block, check the head of the log for torn
		* writes. Search again until we hit the tail or the maximum number of
		* log record I/Os that could have been in flight at one time. Use a
		* temporary buffer so we don't trash the rhead/bp pointer from the
		* call above.
		* Check the head of the log for torn writes. Search backwards from the
		* head until we hit the tail or the maximum number of log record I/Os
		* that could have been in flight at one time. Use a temporary buffer so
		* we don't trash the rhead/bp pointers from the caller.
		*/
		tmp_bp = xlog_get_bp(log, 1);
		if (!tmp_bp)
		@@ -1215,6 +1198,115 @@ xlog_verify_head(
		return error;
		}

		/*
		* Check whether the head of the log points to an unmount record. In other
		* words, determine whether the log is clean. If so, update the in-core state
		* appropriately.
		*/
		static int
		xlog_check_unmount_rec(
		struct xlog *log,
		xfs_daddr_t *head_blk,
		xfs_daddr_t *tail_blk,
		struct xlog_rec_header *rhead,
		xfs_daddr_t rhead_blk,
		struct xfs_buf *bp,
		bool *clean)
		{
		struct xlog_op_header *op_head;
		xfs_daddr_t umount_data_blk;
		xfs_daddr_t after_umount_blk;
		int hblks;
		int error;
		char *offset;

		*clean = false;

		/*
		* Look for unmount record. If we find it, then we know there was a
		* clean unmount. Since 'i' could be the last block in the physical
		* log, we convert to a log block before comparing to the head_blk.
		*
		* Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
		* below. We won't want to clear the unmount record if there is one, so
		* we pass the lsn of the unmount record rather than the block after it.
		*/
		if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
		int h_size = be32_to_cpu(rhead->h_size);
		int h_version = be32_to_cpu(rhead->h_version);

		if ((h_version & XLOG_VERSION_2) &&
		(h_size > XLOG_HEADER_CYCLE_SIZE)) {
		hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
		if (h_size % XLOG_HEADER_CYCLE_SIZE)
		hblks++;
		} else {
		hblks = 1;
		}
		} else {
		hblks = 1;
		}
		after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
		after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
		if (*head_blk == after_umount_blk &&
		be32_to_cpu(rhead->h_num_logops) == 1) {
		umount_data_blk = rhead_blk + hblks;
		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
		if (error)
		return error;

		op_head = (struct xlog_op_header *)offset;
		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
		/*
		* Set tail and last sync so that newly written log
		* records will point recovery to after the current
		* unmount record.
		*/
		xlog_assign_atomic_lsn(&log->l_tail_lsn,
		log->l_curr_cycle, after_umount_blk);
		xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
		log->l_curr_cycle, after_umount_blk);
		*tail_blk = after_umount_blk;

		*clean = true;
		}
		}

		return 0;
		}

		static void
		xlog_set_state(
		struct xlog *log,
		xfs_daddr_t head_blk,
		struct xlog_rec_header *rhead,
		xfs_daddr_t rhead_blk,
		bool bump_cycle)
		{
		/*
		* Reset log values according to the state of the log when we
		* crashed. In the case where head_blk == 0, we bump curr_cycle
		* one because the next write starts a new cycle rather than
		* continuing the cycle of the last good log record. At this
		* point we have guaranteed that all partial log records have been
		* accounted for. Therefore, we know that the last good log record
		* written was complete and ended exactly on the end boundary
		* of the physical log.
		*/
		log->l_prev_block = rhead_blk;
		log->l_curr_block = (int)head_blk;
		log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
		if (bump_cycle)
		log->l_curr_cycle++;
		atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
		atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
		xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
		BBTOB(log->l_curr_block));
		xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
		BBTOB(log->l_curr_block));
		}

		/*
		* Find the sync block number or the tail of the log.
		*
		@@ -1238,22 +1330,20 @@ xlog_find_tail(
		xfs_daddr_t *tail_blk)
		{
		xlog_rec_header_t *rhead;
		xlog_op_header_t *op_head;
		char *offset = NULL;
		xfs_buf_t *bp;
		int error;
		xfs_daddr_t umount_data_blk;
		xfs_daddr_t after_umount_blk;
		xfs_daddr_t rhead_blk;
		xfs_lsn_t tail_lsn;
		int hblks;
		bool wrapped = false;
		bool clean = false;

		/*
		* Find previous log record
		*/
		if ((error = xlog_find_head(log, head_blk)))
		return error;
		ASSERT(*head_blk < INT_MAX);

		bp = xlog_get_bp(log, 1);
		if (!bp)
		@@ -1271,98 +1361,73 @@ xlog_find_tail(
		}

		/*
		* Trim the head block back to skip over torn records. We can have
		* multiple log I/Os in flight at any time, so we assume CRC failures
		* back through the previous several records are torn writes and skip
		* them.
		* Search backwards through the log looking for the log record header
		* block. This wraps all the way back around to the head so something is
		* seriously wrong if we can't find it.
		*/
		ASSERT(*head_blk < INT_MAX);
		error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
		&rhead, &wrapped);
		if (error)
		goto done;
		error = xlog_rseek_logrec_hdr(log, head_blk, head_blk, 1, bp,
		&rhead_blk, &rhead, &wrapped);
		if (error < 0)
		return error;
		if (!error) {
		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
		return -EIO;
		}
		*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));

		/*
		* Reset log values according to the state of the log when we
		* crashed. In the case where head_blk == 0, we bump curr_cycle
		* one because the next write starts a new cycle rather than
		* continuing the cycle of the last good log record. At this
		* point we have guaranteed that all partial log records have been
		* accounted for. Therefore, we know that the last good log record
		* written was complete and ended exactly on the end boundary
		* of the physical log.
		* Set the log state based on the current head record.
		*/
		log->l_prev_block = rhead_blk;
		log->l_curr_block = (int)*head_blk;
		log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
		if (wrapped)
		log->l_curr_cycle++;
		atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
		atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
		xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
		BBTOB(log->l_curr_block));
		xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
		BBTOB(log->l_curr_block));
		xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
		tail_lsn = atomic64_read(&log->l_tail_lsn);

		/*
		* Look for unmount record. If we find it, then we know there
		* was a clean unmount. Since 'i' could be the last block in
		* the physical log, we convert to a log block before comparing
		* to the head_blk.
		*
		* Save the current tail lsn to use to pass to
		* xlog_clear_stale_blocks() below. We won't want to clear the
		* unmount record if there is one, so we pass the lsn of the
		* unmount record rather than the block after it.
		* Look for an unmount record at the head of the log. This sets the log
		* state to determine whether recovery is necessary.
		*/
		if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
		int h_size = be32_to_cpu(rhead->h_size);
		int h_version = be32_to_cpu(rhead->h_version);

		if ((h_version & XLOG_VERSION_2) &&
		(h_size > XLOG_HEADER_CYCLE_SIZE)) {
		hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
		if (h_size % XLOG_HEADER_CYCLE_SIZE)
		hblks++;
		} else {
		hblks = 1;
		}
		} else {
		hblks = 1;
		}
		after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
		after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
		tail_lsn = atomic64_read(&log->l_tail_lsn);
		if (*head_blk == after_umount_blk &&
		be32_to_cpu(rhead->h_num_logops) == 1) {
		umount_data_blk = rhead_blk + hblks;
		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
		error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
		rhead_blk, bp, &clean);
		if (error)
		goto done;

		op_head = (xlog_op_header_t *)offset;
		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
		/*
		* Set tail and last sync so that newly written
		* log records will point recovery to after the
		* current unmount record.
		* Verify the log head if the log is not clean (e.g., we have anything
		* but an unmount record at the head). This uses CRC verification to
		* detect and trim torn writes. If discovered, CRC failures are
		* considered torn writes and the log head is trimmed accordingly.
		*
		* Note that we can only run CRC verification when the log is dirty
		* because there's no guarantee that the log data behind an unmount
		* record is compatible with the current architecture.
		*/
		xlog_assign_atomic_lsn(&log->l_tail_lsn,
		log->l_curr_cycle, after_umount_blk);
		xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
		log->l_curr_cycle, after_umount_blk);
		*tail_blk = after_umount_blk;
		if (!clean) {
		xfs_daddr_t orig_head = *head_blk;

		error = xlog_verify_head(log, head_blk, tail_blk, bp,
		&rhead_blk, &rhead, &wrapped);
		if (error)
		goto done;

		/* update in-core state again if the head changed */
		if (*head_blk != orig_head) {
		xlog_set_state(log, *head_blk, rhead, rhead_blk,
		wrapped);
		tail_lsn = atomic64_read(&log->l_tail_lsn);
		error = xlog_check_unmount_rec(log, head_blk, tail_blk,
		rhead, rhead_blk, bp,
		&clean);
		if (error)
		goto done;
		}
		}

		/*
		* Note that the unmount was clean. If the unmount
		* was not clean, we need to know this to rebuild the
		* superblock counters from the perag headers if we
		* have a filesystem using non-persistent counters.
		* Note that the unmount was clean. If the unmount was not clean, we
		* need to know this to rebuild the superblock counters from the perag
		* headers if we have a filesystem using non-persistent counters.
		*/
		if (clean)
		log->l_mp->m_flags \|= XFS_MOUNT_WAS_CLEAN;
		}
		}

		/*
		* Make sure that there are no blocks in front of the head

Admin message