Commit f59589fc authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.9/drivers

Pull MD fixes from Song.

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid5: Allow degraded raid6 to do rmw
  md/raid5: Fix Force reconstruct-write io stuck in degraded raid5
  raid5: don't duplicate code for different paths in handle_stripe
  raid5-cache: hold spinlock instead of mutex in r5c_journal_mode_show
  md: print errno in super_written
  md/raid5: remove the redundant setting of STRIPE_HANDLE
  md: register new md sysfs file 'uuid' read-only
  md: fix max sectors calculation for super 1.0
parents a9e8e18a 45a4d8fd
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -426,6 +426,10 @@ All md devices contain:
     The accepted values when writing to this file are ``ppl`` and ``resync``,
     used to enable and disable PPL.

  uuid
     This indicates the UUID of the array in the following format:
     xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx


As component devices are added to an md array, they appear in the ``md``
directory as new directories named::
+42 −5
Original line number Diff line number Diff line
@@ -978,7 +978,8 @@ static void super_written(struct bio *bio)
	struct mddev *mddev = rdev->mddev;

	if (bio->bi_status) {
		pr_err("md: super_written gets error=%d\n", bio->bi_status);
		pr_err("md: %s gets error=%d\n", __func__,
		       blk_status_to_errno(bio->bi_status));
		md_error(mddev, rdev);
		if (!test_bit(Faulty, &rdev->flags)
		    && (bio->bi_opf & MD_FAILFAST)) {
@@ -2193,6 +2194,24 @@ retry:
	sb->sb_csum = calc_sb_1_csum(sb);
}

static sector_t super_1_choose_bm_space(sector_t dev_size)
{
	sector_t bm_space;

	/* if the device is bigger than 8Gig, save 64k for bitmap
	 * usage, if bigger than 200Gig, save 128k
	 */
	if (dev_size < 64*2)
		bm_space = 0;
	else if (dev_size - 64*2 >= 200*1024*1024*2)
		bm_space = 128*2;
	else if (dev_size - 4*2 > 8*1024*1024*2)
		bm_space = 64*2;
	else
		bm_space = 4*2;
	return bm_space;
}

static unsigned long long
super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
{
@@ -2213,13 +2232,22 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
		return 0;
	} else {
		/* minor version 0; superblock after data */
		sector_t sb_start;
		sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
		sector_t sb_start, bm_space;
		sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;

		/* 8K is for superblock */
		sb_start = dev_size - 8*2;
		sb_start &= ~(sector_t)(4*2 - 1);
		max_sectors = rdev->sectors + sb_start - rdev->sb_start;

		bm_space = super_1_choose_bm_space(dev_size);

		/* Space that can be used to store date needs to decrease
		 * superblock bitmap space and bad block space(4K)
		 */
		max_sectors = sb_start - bm_space - 4*2;

		if (!num_sectors || num_sectors > max_sectors)
			num_sectors = max_sectors;
		rdev->sb_start = sb_start;
	}
	sb = page_address(rdev->sb_page);
	sb->data_size = cpu_to_le64(num_sectors);
@@ -4225,6 +4253,14 @@ out_unlock:
static struct md_sysfs_entry md_raid_disks =
__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);

static ssize_t
uuid_show(struct mddev *mddev, char *page)
{
	return sprintf(page, "%pU\n", mddev->uuid);
}
static struct md_sysfs_entry md_uuid =
__ATTR(uuid, S_IRUGO, uuid_show, NULL);

static ssize_t
chunk_size_show(struct mddev *mddev, char *page)
{
@@ -5481,6 +5517,7 @@ static struct attribute *md_default_attrs[] = {
	&md_level.attr,
	&md_layout.attr,
	&md_raid_disks.attr,
	&md_uuid.attr,
	&md_chunk_size.attr,
	&md_size.attr,
	&md_resync_start.attr,
+3 −6
Original line number Diff line number Diff line
@@ -2537,13 +2537,10 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
	struct r5conf *conf;
	int ret;

	ret = mddev_lock(mddev);
	if (ret)
		return ret;

	spin_lock(&mddev->lock);
	conf = mddev->private;
	if (!conf || !conf->log) {
		mddev_unlock(mddev);
		spin_unlock(&mddev->lock);
		return 0;
	}

@@ -2563,7 +2560,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
	default:
		ret = 0;
	}
	mddev_unlock(mddev);
	spin_unlock(&mddev->lock);
	return ret;
}

+22 −18
Original line number Diff line number Diff line
@@ -3557,6 +3557,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
				  &sh->dev[s->failed_num[1]] };
	int i;
	bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);


	if (test_bit(R5_LOCKED, &dev->flags) ||
@@ -3615,17 +3616,27 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
			 * devices must be read.
			 */
			return 1;

		if (s->failed >= 2 &&
		    (fdev[i]->towrite ||
		     s->failed_num[i] == sh->pd_idx ||
		     s->failed_num[i] == sh->qd_idx) &&
		    !test_bit(R5_UPTODATE, &fdev[i]->flags))
			/* In max degraded raid6, If the failed disk is P, Q,
			 * or we want to read the failed disk, we need to do
			 * reconstruct-write.
			 */
			force_rcw = true;
	}

	/* If we are forced to do a reconstruct-write, either because
	 * the current RAID6 implementation only supports that, or
	 * because parity cannot be trusted and we are currently
	 * recovering it, there is extra need to be careful.
	/* If we are forced to do a reconstruct-write, because parity
	 * cannot be trusted and we are currently recovering it, there
	 * is extra need to be careful.
	 * If one of the devices that we would need to read, because
	 * it is not being overwritten (and maybe not written at all)
	 * is missing/faulty, then we need to read everything we can.
	 */
	if (sh->raid_conf->level != 6 &&
	if (!force_rcw &&
	    sh->sector < sh->raid_conf->mddev->recovery_cp)
		/* reconstruct-write isn't being forced */
		return 0;
@@ -3995,10 +4006,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
					set_bit(R5_LOCKED, &dev->flags);
					set_bit(R5_Wantread, &dev->flags);
					s->locked++;
				} else {
				} else
					set_bit(STRIPE_DELAYED, &sh->state);
					set_bit(STRIPE_HANDLE, &sh->state);
				}
			}
		}
	}
@@ -4023,10 +4032,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
					set_bit(R5_Wantread, &dev->flags);
					s->locked++;
					qread++;
				} else {
				} else
					set_bit(STRIPE_DELAYED, &sh->state);
					set_bit(STRIPE_HANDLE, &sh->state);
				}
			}
		}
		if (rcw && conf->mddev->queue)
@@ -4866,7 +4873,7 @@ static void handle_stripe(struct stripe_head *sh)
	 * or to load a block that is being partially written.
	 */
	if (s.to_read || s.non_overwrite
	    || (conf->level == 6 && s.to_write && s.failed)
	    || (s.to_write && s.failed)
	    || (s.syncing && (s.uptodate + s.compute < disks))
	    || s.replacing
	    || s.expanding)
@@ -4970,16 +4977,13 @@ static void handle_stripe(struct stripe_head *sh)
				if (!test_bit(R5_ReWrite, &dev->flags)) {
					set_bit(R5_Wantwrite, &dev->flags);
					set_bit(R5_ReWrite, &dev->flags);
					set_bit(R5_LOCKED, &dev->flags);
					s.locked++;
				} else {
				} else
					/* let's read it back */
					set_bit(R5_Wantread, &dev->flags);
				set_bit(R5_LOCKED, &dev->flags);
				s.locked++;
			}
		}
		}

	/* Finish reconstruct operations initiated by the expansion process */
	if (sh->reconstruct_state == reconstruct_state_result) {