Commit e8fc87f6 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'md-next' of https://github.com/liu-song-6/linux into for-5.4/block

Pull MD changes from Song.

* 'md-next' of https://github.com/liu-song-6/linux:
  raid1: factor out a common routine to handle the completion of sync write
  md: don't call spare_active in md_reap_sync_thread if all member devices can't work
  md: don't set In_sync if array is frozen
  md: allow last device to be forcibly removed from RAID1/RAID10.
  md: Convert to use int_pow()
  md/raid10: end bio when the device faulty
  md/raid1: end bio when the device faulty
  md/raid6: Set R5_ReadError when there is read failure on parity disk
  raid1: use an int as the return value of raise_barrier()
parents 00ec4f30 449808a2
Loading
Loading
Loading
Loading
+41 −8
Original line number Diff line number Diff line
@@ -1826,8 +1826,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
				if (!(le32_to_cpu(sb->feature_map) &
				      MD_FEATURE_RECOVERY_BITMAP))
					rdev->saved_raid_disk = -1;
			} else
			} else {
				/*
				 * If the array is FROZEN, then the device can't
				 * be in_sync with rest of array.
				 */
				if (!test_bit(MD_RECOVERY_FROZEN,
					      &mddev->recovery))
					set_bit(In_sync, &rdev->flags);
			}
			rdev->raid_disk = role;
			break;
		}
@@ -3664,11 +3671,7 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
		return -EINVAL;
	if (decimals < 0)
		decimals = 0;
	while (decimals < scale) {
		result *= 10;
		decimals ++;
	}
	*res = result;
	*res = result * int_pow(10, scale - decimals);
	return 0;
}

@@ -5182,6 +5185,34 @@ static struct md_sysfs_entry md_consistency_policy =
__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
       consistency_policy_store);

static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
{
	return sprintf(page, "%d\n", mddev->fail_last_dev);
}

/*
 * Setting fail_last_dev to true to allow last device to be forcibly removed
 * from RAID1/RAID10.
 */
static ssize_t
fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
	int ret;
	bool value;

	ret = kstrtobool(buf, &value);
	if (ret)
		return ret;

	if (value != mddev->fail_last_dev)
		mddev->fail_last_dev = value;

	return len;
}
static struct md_sysfs_entry md_fail_last_dev =
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
       fail_last_dev_store);

static struct attribute *md_default_attrs[] = {
	&md_level.attr,
	&md_layout.attr,
@@ -5198,6 +5229,7 @@ static struct attribute *md_default_attrs[] = {
	&md_array_size.attr,
	&max_corr_read_errors.attr,
	&md_consistency_policy.attr,
	&md_fail_last_dev.attr,
	NULL,
};

@@ -9043,7 +9075,8 @@ void md_reap_sync_thread(struct mddev *mddev)
	/* resync has finished, collect result */
	md_unregister_thread(&mddev->sync_thread);
	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
	    mddev->degraded != mddev->raid_disks) {
		/* success...*/
		/* activate any spares */
		if (mddev->pers->spare_active(mddev)) {
+1 −0
Original line number Diff line number Diff line
@@ -487,6 +487,7 @@ struct mddev {
	unsigned int			good_device_nr;	/* good device num within cluster raid */

	bool	has_superblocks:1;
	bool	fail_last_dev:1;
};

enum recovery_flags {
+39 −37
Original line number Diff line number Diff line
@@ -447,19 +447,21 @@ static void raid1_end_write_request(struct bio *bio)
		    /* We never try FailFast to WriteMostly devices */
		    !test_bit(WriteMostly, &rdev->flags)) {
			md_error(r1_bio->mddev, rdev);
			if (!test_bit(Faulty, &rdev->flags))
				/* This is the only remaining device,
				 * We need to retry the write without
				 * FailFast
		}

		/*
		 * When the device is faulty, it is not necessary to
		 * handle write error.
		 * For failfast, this is the only remaining device,
		 * We need to retry the write without FailFast.
		 */
		if (!test_bit(Faulty, &rdev->flags))
			set_bit(R1BIO_WriteError, &r1_bio->state);
		else {
			/* Finished with this branch */
			r1_bio->bios[mirror] = NULL;
			to_put = bio;
		}
		} else
			set_bit(R1BIO_WriteError, &r1_bio->state);
	} else {
		/*
		 * Set R1BIO_Uptodate in our master bio, so that we
@@ -872,8 +874,11 @@ static void flush_pending_writes(struct r1conf *conf)
 * backgroup IO calls must call raise_barrier.  Once that returns
 *    there is no normal IO happeing.  It must arrange to call
 *    lower_barrier when the particular background IO completes.
 *
 * If resync/recovery is interrupted, returns -EINTR;
 * Otherwise, returns 0.
 */
static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr)
static int raise_barrier(struct r1conf *conf, sector_t sector_nr)
{
	int idx = sector_to_idx(sector_nr);

@@ -1612,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)

	/*
	 * If it is not operational, then we have already marked it as dead
	 * else if it is the last working disks, ignore the error, let the
	 * next level up know.
	 * else if it is the last working disks with "fail_last_dev == false",
	 * ignore the error, let the next level up know.
	 * else mark the drive as failed
	 */
	spin_lock_irqsave(&conf->device_lock, flags);
	if (test_bit(In_sync, &rdev->flags)
	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
	    && (conf->raid_disks - mddev->degraded) == 1) {
		/*
		 * Don't fail the drive, act as though we were just a
@@ -1901,6 +1906,22 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
	} while (sectors_to_go > 0);
}

static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate)
{
	if (atomic_dec_and_test(&r1_bio->remaining)) {
		struct mddev *mddev = r1_bio->mddev;
		int s = r1_bio->sectors;

		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
		    test_bit(R1BIO_WriteError, &r1_bio->state))
			reschedule_retry(r1_bio);
		else {
			put_buf(r1_bio);
			md_done_sync(mddev, s, uptodate);
		}
	}
}

static void end_sync_write(struct bio *bio)
{
	int uptodate = !bio->bi_status;
@@ -1927,16 +1948,7 @@ static void end_sync_write(struct bio *bio)
		)
		set_bit(R1BIO_MadeGood, &r1_bio->state);

	if (atomic_dec_and_test(&r1_bio->remaining)) {
		int s = r1_bio->sectors;
		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
		    test_bit(R1BIO_WriteError, &r1_bio->state))
			reschedule_retry(r1_bio);
		else {
			put_buf(r1_bio);
			md_done_sync(mddev, s, uptodate);
		}
	}
	put_sync_write_buf(r1_bio, uptodate);
}

static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
@@ -2219,17 +2231,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
		generic_make_request(wbio);
	}

	if (atomic_dec_and_test(&r1_bio->remaining)) {
		/* if we're here, all write(s) have completed, so clean up */
		int s = r1_bio->sectors;
		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
		    test_bit(R1BIO_WriteError, &r1_bio->state))
			reschedule_retry(r1_bio);
		else {
			put_buf(r1_bio);
			md_done_sync(mddev, s, 1);
		}
	}
	put_sync_write_buf(r1_bio, 1);
}

/*
+17 −15
Original line number Diff line number Diff line
@@ -465,19 +465,21 @@ static void raid10_end_write_request(struct bio *bio)
			if (test_bit(FailFast, &rdev->flags) &&
			    (bio->bi_opf & MD_FAILFAST)) {
				md_error(rdev->mddev, rdev);
				if (!test_bit(Faulty, &rdev->flags))
					/* This is the only remaining device,
					 * We need to retry the write without
					 * FailFast
			}

			/*
			 * When the device is faulty, it is not necessary to
			 * handle write error.
			 * For failfast, this is the only remaining device,
			 * We need to retry the write without FailFast.
			 */
			if (!test_bit(Faulty, &rdev->flags))
				set_bit(R10BIO_WriteError, &r10_bio->state);
			else {
				r10_bio->devs[slot].bio = NULL;
				to_put = bio;
				dec_rdev = 1;
			}
			} else
				set_bit(R10BIO_WriteError, &r10_bio->state);
		}
	} else {
		/*
@@ -1638,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)

	/*
	 * If it is not operational, then we have already marked it as dead
	 * else if it is the last working disks, ignore the error, let the
	 * next level up know.
	 * else if it is the last working disks with "fail_last_dev == false",
	 * ignore the error, let the next level up know.
	 * else mark the drive as failed
	 */
	spin_lock_irqsave(&conf->device_lock, flags);
	if (test_bit(In_sync, &rdev->flags)
	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
	    && !enough(conf, rdev->raid_disk)) {
		/*
		 * Don't fail the drive, just return an IO error.
+3 −1
Original line number Diff line number Diff line
@@ -2558,7 +2558,9 @@ static void raid5_end_read_request(struct bio * bi)
		    && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
			retry = 1;
		if (retry)
			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
			if (sh->qd_idx >= 0 && sh->pd_idx == i)
				set_bit(R5_ReadError, &sh->dev[i].flags);
			else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
				set_bit(R5_ReadError, &sh->dev[i].flags);
				clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
			} else