Commit 69b00b5b authored by Guoqing Jiang's avatar Guoqing Jiang Committed by Song Liu
Browse files

md: introduce a new struct for IO serialization



Obviously, IO serialization could cause the degradation of
performance a lot. In order to reduce the degradation, so a
rb interval tree is added in raid1 to speed up the check of
collision.

So, a rb root is needed in md_rdev, then abstract all the
serialize related members to a new struct (serial_in_rdev),
embed it into md_rdev.

Of course, we need to free the struct if it is not needed
anymore, so rdev/rdevs_uninit_serial are added accordingly.
And they should be called when destroty memory pool or can't
alloc memory.

And we need to consider to call mddev_destroy_serial_pool
in case serialize_policy/write-behind is disabled, bitmap
is destroyed or in __md_stop_writes.

Signed-off-by: default avatarGuoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
parent 4d26d32f
Loading
Loading
Loading
Loading
+4 −8
Original line number Diff line number Diff line
@@ -1789,10 +1789,8 @@ void md_bitmap_destroy(struct mddev *mddev)
		return;

	md_bitmap_wait_behind_writes(mddev);
	if (!mddev->serialize_policy) {
		mempool_destroy(mddev->serial_info_pool);
		mddev->serial_info_pool = NULL;
	}
	if (!mddev->serialize_policy)
		mddev_destroy_serial_pool(mddev, NULL, true);

	mutex_lock(&mddev->bitmap_info.mutex);
	spin_lock(&mddev->lock);
@@ -2478,10 +2476,8 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
	mddev->bitmap_info.max_write_behind = backlog;
	if (!backlog && mddev->serial_info_pool) {
		/* serial_info_pool is not needed if backlog is zero */
		if (!mddev->serialize_policy) {
			mempool_destroy(mddev->serial_info_pool);
			mddev->serial_info_pool = NULL;
		}
		if (!mddev->serialize_policy)
			mddev_destroy_serial_pool(mddev, NULL, false);
	} else if (backlog && !mddev->serial_info_pool) {
		/* serial_info_pool is needed since backlog is not zero */
		struct md_rdev *rdev;
+62 −18
Original line number Diff line number Diff line
@@ -125,25 +125,59 @@ static inline int speed_max(struct mddev *mddev)
		mddev->sync_speed_max : sysctl_speed_limit_max;
}

static void rdev_uninit_serial(struct md_rdev *rdev)
{
	if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
		return;

	kfree(rdev->serial);
	rdev->serial = NULL;
}

static void rdevs_uninit_serial(struct mddev *mddev)
{
	struct md_rdev *rdev;

	rdev_for_each(rdev, mddev)
		rdev_uninit_serial(rdev);
}

static int rdev_init_serial(struct md_rdev *rdev)
{
	spin_lock_init(&rdev->serial_list_lock);
	INIT_LIST_HEAD(&rdev->serial_list);
	init_waitqueue_head(&rdev->serial_io_wait);
	struct serial_in_rdev *serial = NULL;

	if (test_bit(CollisionCheck, &rdev->flags))
		return 0;

	serial = kmalloc(sizeof(struct serial_in_rdev), GFP_KERNEL);
	if (!serial)
		return -ENOMEM;

	spin_lock_init(&serial->serial_lock);
	serial->serial_rb = RB_ROOT_CACHED;
	init_waitqueue_head(&serial->serial_io_wait);
	rdev->serial = serial;
	set_bit(CollisionCheck, &rdev->flags);

	return 1;
	return 0;
}

static void rdevs_init_serial(struct mddev *mddev)
static int rdevs_init_serial(struct mddev *mddev)
{
	struct md_rdev *rdev;
	int ret = 0;

	rdev_for_each(rdev, mddev) {
		if (test_bit(CollisionCheck, &rdev->flags))
			continue;
		rdev_init_serial(rdev);
		ret = rdev_init_serial(rdev);
		if (ret)
			break;
	}

	/* Free all resources if pool is not existed */
	if (ret && !mddev->serial_info_pool)
		rdevs_uninit_serial(mddev);

	return ret;
}

/*
@@ -166,6 +200,8 @@ static int rdev_need_serial(struct md_rdev *rdev)
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
			      bool is_suspend)
{
	int ret = 0;

	if (rdev && !rdev_need_serial(rdev) &&
	    !test_bit(CollisionCheck, &rdev->flags))
		return;
@@ -174,9 +210,11 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
		mddev_suspend(mddev);

	if (!rdev)
		rdevs_init_serial(mddev);
		ret = rdevs_init_serial(mddev);
	else
		rdev_init_serial(rdev);
		ret = rdev_init_serial(rdev);
	if (ret)
		goto abort;

	if (mddev->serial_info_pool == NULL) {
		unsigned int noio_flag;
@@ -186,9 +224,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
			mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
						sizeof(struct serial_info));
		memalloc_noio_restore(noio_flag);
		if (!mddev->serial_info_pool)
		if (!mddev->serial_info_pool) {
			rdevs_uninit_serial(mddev);
			pr_err("can't alloc memory pool for serialization\n");
		}
	}

abort:
	if (!is_suspend)
		mddev_resume(mddev);
}
@@ -199,7 +241,7 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
 * 2. when bitmap is destroyed while policy is not enabled.
 * 3. for disable policy, the pool is destroyed only when no rdev needs it.
 */
static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
			       bool is_suspend)
{
	if (rdev && !test_bit(CollisionCheck, &rdev->flags))
@@ -213,8 +255,9 @@ static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
			mddev_suspend(mddev);
		rdev_for_each(temp, mddev) {
			if (!rdev) {
				if (!rdev_need_serial(temp))
					clear_bit(CollisionCheck, &temp->flags);
				if (!mddev->serialize_policy ||
				    !rdev_need_serial(temp))
					rdev_uninit_serial(temp);
				else
					num++;
			} else if (temp != rdev &&
@@ -223,7 +266,7 @@ static void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
		}

		if (rdev)
			clear_bit(CollisionCheck, &rdev->flags);
			rdev_uninit_serial(rdev);

		if (num)
			pr_info("The mempool could be used by other devices\n");
@@ -6117,8 +6160,9 @@ static void __md_stop_writes(struct mddev *mddev)
			mddev->in_sync = 1;
		md_update_sb(mddev, 1);
	}
	mempool_destroy(mddev->serial_info_pool);
	mddev->serial_info_pool = NULL;
	/* disable policy to guarantee rdevs free resources for serialization */
	mddev->serialize_policy = 0;
	mddev_destroy_serial_pool(mddev, NULL, true);
}

void md_stop_writes(struct mddev *mddev)
+17 −9
Original line number Diff line number Diff line
@@ -32,6 +32,16 @@
 * be retried.
 */
#define	MD_FAILFAST	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)

/*
 * The struct embedded in rdev is used to serialize IO.
 */
struct serial_in_rdev {
	struct rb_root_cached serial_rb;
	spinlock_t serial_lock;
	wait_queue_head_t serial_io_wait;
};

/*
 * MD's 'extended' device
 */
@@ -110,12 +120,7 @@ struct md_rdev {
					   * in superblock.
					   */

	/*
	 * The members for check collision of write IOs.
	 */
	struct list_head serial_list;
	spinlock_t serial_list_lock;
	wait_queue_head_t serial_io_wait;
	struct serial_in_rdev *serial;  /* used for raid1 io serialization */

	struct work_struct del_work;	/* used for delayed sysfs removal */

@@ -266,9 +271,10 @@ enum mddev_sb_flags {
#define NR_SERIAL_INFOS		8
/* record current range of serialize IOs */
struct serial_info {
	sector_t lo;
	sector_t hi;
	struct list_head list;
	struct rb_node node;
	sector_t start;		/* start sector of rb node */
	sector_t last;		/* end sector of rb node */
	sector_t _subtree_last; /* highest sector in subtree of rb node */
};

struct mddev {
@@ -740,6 +746,8 @@ extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
				     bool is_suspend);
extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
				      bool is_suspend);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);

+33 −28
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
#include <linux/interval_tree_generic.h>

#include <trace/events/block.h>

@@ -50,55 +51,58 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);

#include "raid1-10.c"

#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last,
		     START, LAST, static inline, raid1_rb);

static int check_and_add_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
{
	struct serial_info *wi, *temp_wi;
	struct serial_info *si;
	unsigned long flags;
	int ret = 0;
	struct mddev *mddev = rdev->mddev;
	struct serial_in_rdev *serial = rdev->serial;

	wi = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
	si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);

	spin_lock_irqsave(&rdev->serial_list_lock, flags);
	list_for_each_entry(temp_wi, &rdev->serial_list, list) {
	spin_lock_irqsave(&serial->serial_lock, flags);
	/* collision happened */
		if (hi > temp_wi->lo && lo < temp_wi->hi) {
	if (raid1_rb_iter_first(&serial->serial_rb, lo, hi))
		ret = -EBUSY;
			break;
		}
	}

	if (!ret) {
		wi->lo = lo;
		wi->hi = hi;
		list_add(&wi->list, &rdev->serial_list);
		si->start = lo;
		si->last = hi;
		raid1_rb_insert(si, &serial->serial_rb);
	} else
		mempool_free(wi, mddev->serial_info_pool);
	spin_unlock_irqrestore(&rdev->serial_list_lock, flags);
		mempool_free(si, mddev->serial_info_pool);
	spin_unlock_irqrestore(&serial->serial_lock, flags);

	return ret;
}

static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
{
	struct serial_info *wi;
	struct serial_info *si;
	unsigned long flags;
	int found = 0;
	struct mddev *mddev = rdev->mddev;

	spin_lock_irqsave(&rdev->serial_list_lock, flags);
	list_for_each_entry(wi, &rdev->serial_list, list)
		if (hi == wi->hi && lo == wi->lo) {
			list_del(&wi->list);
			mempool_free(wi, mddev->serial_info_pool);
	struct serial_in_rdev *serial = rdev->serial;

	spin_lock_irqsave(&serial->serial_lock, flags);
	for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
	     si; si = raid1_rb_iter_next(si, lo, hi)) {
		if (si->start == lo && si->last == hi) {
			raid1_rb_remove(si, &serial->serial_rb);
			mempool_free(si, mddev->serial_info_pool);
			found = 1;
			break;
		}

	}
	if (!found)
		WARN(1, "The write IO is not recorded for serialization\n");
	spin_unlock_irqrestore(&rdev->serial_list_lock, flags);
	wake_up(&rdev->serial_io_wait);
	spin_unlock_irqrestore(&serial->serial_lock, flags);
	wake_up(&serial->serial_io_wait);
}

/*
@@ -1482,6 +1486,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
	for (i = 0; i < disks; i++) {
		struct bio *mbio = NULL;
		struct md_rdev *rdev = conf->mirrors[i].rdev;
		struct serial_in_rdev *serial = rdev->serial;
		if (!r1_bio->bios[i])
			continue;

@@ -1510,13 +1515,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,

		if (r1_bio->behind_master_bio) {
			if (test_bit(CollisionCheck, &rdev->flags))
				wait_event(rdev->serial_io_wait,
				wait_event(serial->serial_io_wait,
					   check_and_add_serial(rdev, lo, hi)
					   == 0);
			if (test_bit(WriteMostly, &rdev->flags))
				atomic_inc(&r1_bio->behind_remaining);
		} else if (mddev->serialize_policy)
			wait_event(rdev->serial_io_wait,
			wait_event(serial->serial_io_wait,
				   check_and_add_serial(rdev, lo, hi) == 0);

		r1_bio->bios[i] = mbio;