Commit 4dbcdc75 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds
Browse files

[PATCH] md: count corrected read errors per drive



Store this total in superblock (As appropriate), and make it available to
userspace via sysfs.

Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Acked-by: default avatarGreg KH <greg@kroah.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d9d166c2
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -222,6 +222,17 @@ Each directory contains:
			 of being recoverred to
	This list make grow in future.

      errors
	An approximate count of read errors that have been detected on
	this device but have not caused the device to be evicted from
	the array (either because they were corrected or because they
	happened while the array was read-only).  When using version-1
	metadata, this value persists across restarts of the array.

	This value can be written while assembling an array thus
	providing an ongoing count for arrays with metadata managed by
	userspace.


An active md device will also contain and entry for each active device
in the array.  These are named
+26 −1
Original line number Diff line number Diff line
@@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
	}
	rdev->preferred_minor = 0xffff;
	rdev->data_offset = le64_to_cpu(sb->data_offset);
	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));

	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
@@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
	else
		sb->resync_offset = cpu_to_le64(0);

	sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);

	if (mddev->bitmap && mddev->bitmap_file == NULL) {
		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page)
}
static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);

static ssize_t
errors_show(mdk_rdev_t *rdev, char *page)
{
	return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
}

static ssize_t
errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
	char *e;
	unsigned long n = simple_strtoul(buf, &e, 10);
	if (*buf && (*e == 0 || *e == '\n')) {
		atomic_set(&rdev->corrected_errors, n);
		return len;
	}
	return -EINVAL;
}
static struct rdev_sysfs_entry rdev_errors =
__ATTR(errors, 0644, errors_show, errors_store);

static struct attribute *rdev_default_attrs[] = {
	&rdev_state.attr,
	&rdev_super.attr,
	&rdev_errors.attr,
	NULL,
};
static ssize_t
@@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
	rdev->data_offset = 0;
	atomic_set(&rdev->nr_pending, 0);
	atomic_set(&rdev->read_errors, 0);
	atomic_set(&rdev->corrected_errors, 0);

	size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
	if (!size) {
@@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
	int num = simple_strtoul(val, &e, 10);
	if (*val && (*e == '\0' || *e == '\n')) {
		start_readonly = num;
		return 0;;
		return 0;
	}
	return -EINVAL;
}
+2 −0
Original line number Diff line number Diff line
@@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
						continue;
					rdev = conf->mirrors[d].rdev;
					atomic_add(s, &rdev->corrected_errors);
					if (sync_page_io(rdev->bdev,
							 sect + rdev->data_offset,
							 s<<9,
@@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev)
							d = conf->raid_disks;
						d--;
						rdev = conf->mirrors[d].rdev;
						atomic_add(s, &rdev->corrected_errors);
						if (rdev &&
						    test_bit(In_sync, &rdev->flags)) {
							if (sync_page_io(rdev->bdev,
+8 −3
Original line number Diff line number Diff line
@@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)

	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
		set_bit(R10BIO_Uptodate, &r10_bio->state);
	else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
	else {
		atomic_add(r10_bio->sectors,
			   &conf->mirrors[d].rdev->corrected_errors);
		if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
			md_error(r10_bio->mddev,
				 conf->mirrors[d].rdev);
	}

	/* for reconstruct, we always reschedule after a read.
	 * for resync, only after all reads
@@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev)
						sl--;
						d = r10_bio->devs[sl].devnum;
						rdev = conf->mirrors[d].rdev;
						atomic_add(s, &rdev->corrected_errors);
						if (rdev &&
						    test_bit(In_sync, &rdev->flags)) {
							if (sync_page_io(rdev->bdev,
+3 −0
Original line number Diff line number Diff line
@@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh)
			bi->bi_io_vec[0].bv_offset = 0;
			bi->bi_size = STRIPE_SIZE;
			bi->bi_next = NULL;
			if (rw == WRITE &&
			    test_bit(R5_ReWrite, &sh->dev[i].flags))
				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
			generic_make_request(bi);
		} else {
			if (rw == 1)
Loading