md/raid5: replace sh->lock with an 'active' flag. (c4c1663b) · Commits · 戴 / test

drivers/md/raid5.c

+13 −13

Original line number	Diff line number	Diff line
		@@ -1020,14 +1020,12 @@ ops_run_biodrain(struct stripe_head sh, struct dma_async_tx_descriptor tx)
		if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
		struct bio *wbi;

		spin_lock(&sh->lock);
		spin_lock_irq(&sh->raid_conf->device_lock);
		chosen = dev->towrite;
		dev->towrite = NULL;
		BUG_ON(dev->written);
		wbi = dev->written = chosen;
		spin_unlock_irq(&sh->raid_conf->device_lock);
		spin_unlock(&sh->lock);

		while (wbi && wbi->bi_sector <
		dev->sector + STRIPE_SECTORS) {
		@@ -1322,7 +1320,6 @@ static int grow_one_stripe(raid5_conf_t *conf)
		return 0;

		sh->raid_conf = conf;
		spin_lock_init(&sh->lock);
		#ifdef CONFIG_MULTICORE_RAID456
		init_waitqueue_head(&sh->ops.wait_for_ops);
		#endif
		@@ -1442,7 +1439,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
		break;

		nsh->raid_conf = conf;
		spin_lock_init(&nsh->lock);
		#ifdef CONFIG_MULTICORE_RAID456
		init_waitqueue_head(&nsh->ops.wait_for_ops);
		#endif
		@@ -2148,7 +2144,6 @@ static int add_stripe_bio(struct stripe_head sh, struct bio bi, int dd_idx, in
		(unsigned long long)sh->sector);


		spin_lock(&sh->lock);
		spin_lock_irq(&conf->device_lock);
		if (forwrite) {
		bip = &sh->dev[dd_idx].towrite;
		@@ -2184,7 +2179,6 @@ static int add_stripe_bio(struct stripe_head sh, struct bio bi, int dd_idx, in
		set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
		}
		spin_unlock_irq(&conf->device_lock);
		spin_unlock(&sh->lock);

		pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
		(unsigned long long)(*bip)->bi_sector,
		@@ -2201,7 +2195,6 @@ static int add_stripe_bio(struct stripe_head sh, struct bio bi, int dd_idx, in
		overlap:
		set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
		spin_unlock_irq(&conf->device_lock);
		spin_unlock(&sh->lock);
		return 0;
		}

		@@ -3023,12 +3016,10 @@ static void handle_stripe5(struct stripe_head *sh)
		atomic_read(&sh->count), sh->pd_idx, sh->check_state,
		sh->reconstruct_state);

		spin_lock(&sh->lock);
		if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
		set_bit(STRIPE_SYNCING, &sh->state);
		clear_bit(STRIPE_INSYNC, &sh->state);
		}
		clear_bit(STRIPE_HANDLE, &sh->state);
		clear_bit(STRIPE_DELAYED, &sh->state);

		s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
		@@ -3037,6 +3028,7 @@ static void handle_stripe5(struct stripe_head *sh)

		/* Now to look around and see what can be done */
		rcu_read_lock();
		spin_lock_irq(&conf->device_lock);
		for (i=disks; i--; ) {
		mdk_rdev_t *rdev;

		@@ -3099,6 +3091,7 @@ static void handle_stripe5(struct stripe_head *sh)
		s.failed_num = i;
		}
		}
		spin_unlock_irq(&conf->device_lock);
		rcu_read_unlock();

		if (unlikely(blocked_rdev)) {
		@@ -3275,7 +3268,6 @@ static void handle_stripe5(struct stripe_head *sh)
		handle_stripe_expansion(conf, sh, NULL);

		unlock:
		spin_unlock(&sh->lock);

		/* wait for this device to become unblocked */
		if (unlikely(blocked_rdev))
		@@ -3318,12 +3310,10 @@ static void handle_stripe6(struct stripe_head *sh)
		sh->check_state, sh->reconstruct_state);
		memset(&s, 0, sizeof(s));

		spin_lock(&sh->lock);
		if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
		set_bit(STRIPE_SYNCING, &sh->state);
		clear_bit(STRIPE_INSYNC, &sh->state);
		}
		clear_bit(STRIPE_HANDLE, &sh->state);
		clear_bit(STRIPE_DELAYED, &sh->state);

		s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
		@@ -3332,6 +3322,7 @@ static void handle_stripe6(struct stripe_head *sh)
		/* Now to look around and see what can be done */

		rcu_read_lock();
		spin_lock_irq(&conf->device_lock);
		for (i=disks; i--; ) {
		mdk_rdev_t *rdev;
		dev = &sh->dev[i];
		@@ -3395,6 +3386,7 @@ static void handle_stripe6(struct stripe_head *sh)
		s.failed++;
		}
		}
		spin_unlock_irq(&conf->device_lock);
		rcu_read_unlock();

		if (unlikely(blocked_rdev)) {
		@@ -3580,7 +3572,6 @@ static void handle_stripe6(struct stripe_head *sh)
		handle_stripe_expansion(conf, sh, &r6s);

		unlock:
		spin_unlock(&sh->lock);

		/* wait for this device to become unblocked */
		if (unlikely(blocked_rdev))
		@@ -3608,10 +3599,19 @@ static void handle_stripe6(struct stripe_head *sh)

		static void handle_stripe(struct stripe_head *sh)
		{
		clear_bit(STRIPE_HANDLE, &sh->state);
		if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
		/* already being handled, ensure it gets handled
		* again when current action finishes */
		set_bit(STRIPE_HANDLE, &sh->state);
		return;
		}

		if (sh->raid_conf->level == 6)
		handle_stripe6(sh);
		else
		handle_stripe5(sh);
		clear_bit(STRIPE_ACTIVE, &sh->state);
		}

		static void raid5_activate_delayed(raid5_conf_t *conf)

drivers/md/raid5.h

+16 −19

Original line number	Diff line number	Diff line
		@@ -6,11 +6,11 @@

		/*
		*
		* Each stripe contains one buffer per disc. Each buffer can be in
		* Each stripe contains one buffer per device. Each buffer can be in
		* one of a number of states stored in "flags". Changes between
		* these states happen almost exclusively under a per-stripe
		* spinlock. Some very specific changes can happen in bi_end_io, and
		* these are not protected by the spin lock.
		* these states happen almost exclusively under the protection of the
		* STRIPE_ACTIVE flag. Some very specific changes can happen in bi_end_io, and
		* these are not protected by STRIPE_ACTIVE.
		*
		* The flag bits that are used to represent these states are:
		* R5_UPTODATE and R5_LOCKED
		@@ -76,12 +76,10 @@
		* block and the cached buffer are successfully written, any buffer on
		* a written list can be returned with b_end_io.
		*
		* The write list and read list both act as fifos. The read list is
		* protected by the device_lock. The write and written lists are
		* protected by the stripe lock. The device_lock, which can be
		* claimed while the stipe lock is held, is only for list
		* manipulations and will only be held for a very short time. It can
		* be claimed from interrupts.
		* The write list and read list both act as fifos. The read list,
		* write list and written list are protected by the device_lock.
		* The device_lock is only for list manipulations and will only be
		* held for a very short time. It can be claimed from interrupts.
		*
		*
		* Stripes in the stripe cache can be on one of two lists (or on
		@@ -96,7 +94,6 @@
		*
		* The inactive_list, handle_list and hash bucket lists are all protected by the
		* device_lock.
		* - stripes on the inactive_list never have their stripe_lock held.
		* - stripes have a reference counter. If count==0, they are on a list.
		* - If a stripe might need handling, STRIPE_HANDLE is set.
		* - When refcount reaches zero, then if STRIPE_HANDLE it is put on
		@@ -116,10 +113,10 @@
		* attach a request to an active stripe (add_stripe_bh())
		* lockdev attach-buffer unlockdev
		* handle a stripe (handle_stripe())
		* lockstripe clrSTRIPE_HANDLE ...
		* setSTRIPE_ACTIVE, clrSTRIPE_HANDLE ...
		* (lockdev check-buffers unlockdev) ..
		* change-state ..
		* record io/ops needed unlockstripe schedule io/ops
		* record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
		* release an active stripe (release_stripe())
		* lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
		*
		@@ -128,8 +125,7 @@
		* on a cached buffer, and plus one if the stripe is undergoing stripe
		* operations.
		*
		* Stripe operations are performed outside the stripe lock,
		* the stripe operations are:
		* The stripe operations are:
		* -copying data between the stripe cache and user application buffers
		* -computing blocks to save a disk access, or to recover a missing block
		* -updating the parity on a write operation (reconstruct write and
		@@ -159,7 +155,8 @@
		*/

		/*
		* Operations state - intermediate states that are visible outside of sh->lock
		* Operations state - intermediate states that are visible outside of
		* STRIPE_ACTIVE.
		* In general _idle indicates nothing is running, _run indicates a data
		* processing operation is active, and _result means the data processing result
		* is stable and can be acted upon. For simple operations like biofill and
		@@ -209,7 +206,6 @@ struct stripe_head {
		short ddf_layout;/* use DDF ordering to calculate Q */
		unsigned long state; /* state flags */
		atomic_t count; /* nr of active thread/requests */
		spinlock_t lock;
		int bm_seq; /* sequence number for bitmap flushes */
		int disks; /* disks in stripe */
		enum check_states check_state;
		@@ -240,7 +236,7 @@ struct stripe_head {
		};

		/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
		* for handle_stripe. It is only valid under spin_lock(sh->lock);
		* for handle_stripe.
		*/
		struct stripe_head_state {
		int syncing, expanding, expanded;
		@@ -290,6 +286,7 @@ struct r6_state {
		* Stripe state
		*/
		enum {
		STRIPE_ACTIVE,
		STRIPE_HANDLE,
		STRIPE_SYNC_REQUESTED,
		STRIPE_SYNCING,
		@@ -339,7 +336,7 @@ enum {
		* PREREAD_ACTIVE.
		* In stripe_handle, if we find pre-reading is necessary, we do it if
		* PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
		* HANDLE gets cleared if stripe_handle leave nothing locked.
		* HANDLE gets cleared if stripe_handle leaves nothing locked.
		*/

Admin message