Merge branch 'for-5.10/block' into for-5.10/drivers (ac8f7a02) · Commits · 戴 / test

Documentation/filesystems/locking.rst

+0 −3

Original line number	Diff line number	Diff line
		@@ -488,9 +488,6 @@ getgeo: no
		swap_slot_free_notify: no (see below)
		======================= ===================

		unlock_native_capacity and revalidate_disk are called only from
		check_disk_change().

		swap_slot_free_notify is called with swap_lock and sometimes the page lock
		held.

block/Kconfig

+0 −2

Original line number	Diff line number	Diff line
		@@ -161,8 +161,6 @@ config BLK_WBT_MQ
		depends on BLK_WBT
		help
		Enable writeback throttling by default on multiqueue devices.
		Multiqueue currently doesn't have support for IO scheduling,
		enabling this option is recommended.

		config BLK_DEBUG_FS
		bool "Block layer debugging information in debugfs"

block/bfq-iosched.c

+7 −2

Original line number	Diff line number	Diff line
		@@ -4640,6 +4640,9 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
		{
		struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;

		if (!atomic_read(&hctx->elevator_queued))
		return false;

		/*
		* Avoiding lock: a race on bfqd->busy_queues should cause at
		* most a call to dispatch for nothing
		@@ -5554,6 +5557,7 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
		rq = list_first_entry(list, struct request, queuelist);
		list_del_init(&rq->queuelist);
		bfq_insert_request(hctx, rq, at_head);
		atomic_inc(&hctx->elevator_queued);
		}
		}

		@@ -5921,6 +5925,7 @@ static void bfq_finish_requeue_request(struct request *rq)

		bfq_completed_request(bfqq, bfqd);
		bfq_finish_requeue_request_body(bfqq);
		atomic_dec(&rq->mq_hctx->elevator_queued);

		spin_unlock_irqrestore(&bfqd->lock, flags);
		} else {
		@@ -6360,8 +6365,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
		struct blk_mq_tags *tags = hctx->sched_tags;
		unsigned int min_shallow;

		min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
		sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
		min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags);
		sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow);
		}

		static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)

block/blk-cgroup.c

+26 −6

Original line number	Diff line number	Diff line
		@@ -119,6 +119,8 @@ static void blkg_async_bio_workfn(struct work_struct *work)
		async_bio_work);
		struct bio_list bios = BIO_EMPTY_LIST;
		struct bio *bio;
		struct blk_plug plug;
		bool need_plug = false;

		/* as long as there are pending bios, @blkg can't go away */
		spin_lock_bh(&blkg->async_bio_lock);
		@@ -126,8 +128,15 @@ static void blkg_async_bio_workfn(struct work_struct *work)
		bio_list_init(&blkg->async_bios);
		spin_unlock_bh(&blkg->async_bio_lock);

		/* start plug only when bio_list contains at least 2 bios */
		if (bios.head && bios.head->bi_next) {
		need_plug = true;
		blk_start_plug(&plug);
		}
		while ((bio = bio_list_pop(&bios)))
		submit_bio(bio);
		if (need_plug)
		blk_finish_plug(&plug);
		}

		/**
		@@ -1613,16 +1622,24 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
		static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
		{
		unsigned long pflags;
		bool clamp;
		u64 now = ktime_to_ns(ktime_get());
		u64 exp;
		u64 delay_nsec = 0;
		int tok;

		while (blkg->parent) {
		if (atomic_read(&blkg->use_delay)) {
		int use_delay = atomic_read(&blkg->use_delay);

		if (use_delay) {
		u64 this_delay;

		blkcg_scale_delay(blkg, now);
		delay_nsec = max_t(u64, delay_nsec,
		atomic64_read(&blkg->delay_nsec));
		this_delay = atomic64_read(&blkg->delay_nsec);
		if (this_delay > delay_nsec) {
		delay_nsec = this_delay;
		clamp = use_delay > 0;
		}
		}
		blkg = blkg->parent;
		}
		@@ -1634,9 +1651,12 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
		* Let's not sleep for all eternity if we've amassed a huge delay.
		* Swapping or metadata IO can accumulate 10's of seconds worth of
		* delay, and we want userspace to be able to do _something_ so cap the
		* delays at 1 second. If there's 10's of seconds worth of delay then
		* the tasks will be delayed for 1 second for every syscall.
		* delays at 0.25s. If there's 10's of seconds worth of delay then the
		* tasks will be delayed for 0.25 second for every syscall. If
		* blkcg_set_delay() was used as indicated by negative use_delay, the
		* caller is responsible for regulating the range.
		*/
		if (clamp)
		delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);

		if (use_memdelay)

block/blk-core.c

+63 −176

Original line number	Diff line number	Diff line
		@@ -116,8 +116,8 @@ void blk_rq_init(struct request_queue q, struct request rq)
		rq->__sector = (sector_t) -1;
		INIT_HLIST_NODE(&rq->hash);
		RB_CLEAR_NODE(&rq->rb_node);
		rq->tag = -1;
		rq->internal_tag = -1;
		rq->tag = BLK_MQ_NO_TAG;
		rq->internal_tag = BLK_MQ_NO_TAG;
		rq->start_time_ns = ktime_get_ns();
		rq->part = NULL;
		refcount_set(&rq->ref, 1);
		@@ -538,11 +538,10 @@ struct request_queue *blk_alloc_queue(int node_id)
		if (!q->stats)
		goto fail_stats;

		q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
		q->backing_dev_info->io_pages = VM_READAHEAD_PAGES;
		q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
		q->node = node_id;

		atomic_set(&q->nr_active_requests_shared_sbitmap, 0);

		timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
		laptop_mode_timer_fn, 0);
		timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
		@@ -643,162 +642,6 @@ void blk_put_request(struct request *req)
		}
		EXPORT_SYMBOL(blk_put_request);

		static void blk_account_io_merge_bio(struct request *req)
		{
		if (!blk_do_io_stat(req))
		return;

		part_stat_lock();
		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
		part_stat_unlock();
		}

		bool bio_attempt_back_merge(struct request req, struct bio bio,
		unsigned int nr_segs)
		{
		const int ff = bio->bi_opf & REQ_FAILFAST_MASK;

		if (!ll_back_merge_fn(req, bio, nr_segs))
		return false;

		trace_block_bio_backmerge(req->q, req, bio);
		rq_qos_merge(req->q, req, bio);

		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
		blk_rq_set_mixed_merge(req);

		req->biotail->bi_next = bio;
		req->biotail = bio;
		req->__data_len += bio->bi_iter.bi_size;

		bio_crypt_free_ctx(bio);

		blk_account_io_merge_bio(req);
		return true;
		}

		bool bio_attempt_front_merge(struct request req, struct bio bio,
		unsigned int nr_segs)
		{
		const int ff = bio->bi_opf & REQ_FAILFAST_MASK;

		if (!ll_front_merge_fn(req, bio, nr_segs))
		return false;

		trace_block_bio_frontmerge(req->q, req, bio);
		rq_qos_merge(req->q, req, bio);

		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
		blk_rq_set_mixed_merge(req);

		bio->bi_next = req->bio;
		req->bio = bio;

		req->__sector = bio->bi_iter.bi_sector;
		req->__data_len += bio->bi_iter.bi_size;

		bio_crypt_do_front_merge(req, bio);

		blk_account_io_merge_bio(req);
		return true;
		}

		bool bio_attempt_discard_merge(struct request_queue q, struct request req,
		struct bio *bio)
		{
		unsigned short segments = blk_rq_nr_discard_segments(req);

		if (segments >= queue_max_discard_segments(q))
		goto no_merge;
		if (blk_rq_sectors(req) + bio_sectors(bio) >
		blk_rq_get_max_sectors(req, blk_rq_pos(req)))
		goto no_merge;

		rq_qos_merge(q, req, bio);

		req->biotail->bi_next = bio;
		req->biotail = bio;
		req->__data_len += bio->bi_iter.bi_size;
		req->nr_phys_segments = segments + 1;

		blk_account_io_merge_bio(req);
		return true;
		no_merge:
		req_set_nomerge(q, req);
		return false;
		}

		/**
		* blk_attempt_plug_merge - try to merge with %current's plugged list
		* @q: request_queue new bio is being queued at
		* @bio: new bio being queued
		* @nr_segs: number of segments in @bio
		* @same_queue_rq: pointer to &struct request that gets filled in when
		* another request associated with @q is found on the plug list
		* (optional, may be %NULL)
		*
		* Determine whether @bio being queued on @q can be merged with a request
		* on %current's plugged list. Returns %true if merge was successful,
		* otherwise %false.
		*
		* Plugging coalesces IOs from the same issuer for the same purpose without
		* going through @q->queue_lock. As such it's more of an issuing mechanism
		* than scheduling, and the request, while may have elvpriv data, is not
		* added on the elevator at this point. In addition, we don't have
		* reliable access to the elevator outside queue lock. Only check basic
		* merging parameters without querying the elevator.
		*
		* Caller must ensure !blk_queue_nomerges(q) beforehand.
		*/
		bool blk_attempt_plug_merge(struct request_queue q, struct bio bio,
		unsigned int nr_segs, struct request **same_queue_rq)
		{
		struct blk_plug *plug;
		struct request *rq;
		struct list_head *plug_list;

		plug = blk_mq_plug(q, bio);
		if (!plug)
		return false;

		plug_list = &plug->mq_list;

		list_for_each_entry_reverse(rq, plug_list, queuelist) {
		bool merged = false;

		if (rq->q == q && same_queue_rq) {
		/*
		* Only blk-mq multiple hardware queues case checks the
		* rq in the same queue, there should be only one such
		* rq in a queue
		**/
		*same_queue_rq = rq;
		}

		if (rq->q != q \|\| !blk_rq_merge_ok(rq, bio))
		continue;

		switch (blk_try_merge(rq, bio)) {
		case ELEVATOR_BACK_MERGE:
		merged = bio_attempt_back_merge(rq, bio, nr_segs);
		break;
		case ELEVATOR_FRONT_MERGE:
		merged = bio_attempt_front_merge(rq, bio, nr_segs);
		break;
		case ELEVATOR_DISCARD_MERGE:
		merged = bio_attempt_discard_merge(q, rq, bio);
		break;
		default:
		break;
		}

		if (merged)
		return true;
		}

		return false;
		}

		static void handle_bad_sector(struct bio *bio, sector_t maxsector)
		{
		char b[BDEVNAME_SIZE];
		@@ -1301,14 +1144,28 @@ EXPORT_SYMBOL(submit_bio);
		* limits when retrying requests on other queues. Those requests need
		* to be checked against the new queue limits again during dispatch.
		*/
		static int blk_cloned_rq_check_limits(struct request_queue *q,
		static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
		struct request *rq)
		{
		if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
		unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));

		if (blk_rq_sectors(rq) > max_sectors) {
		/*
		* SCSI device does not have a good way to return if
		* Write Same/Zero is actually supported. If a device rejects
		* a non-read/write command (discard, write same,etc.) the
		* low-level device driver will set the relevant queue limit to
		* 0 to prevent blk-lib from issuing more of the offending
		* operations. Commands queued prior to the queue limit being
		* reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
		* errors being propagated to upper layers.
		*/
		if (max_sectors == 0)
		return BLK_STS_NOTSUPP;

		printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
		__func__, blk_rq_sectors(rq),
		blk_queue_get_max_sectors(q, req_op(rq)));
		return -EIO;
		__func__, blk_rq_sectors(rq), max_sectors);
		return BLK_STS_IOERR;
		}

		/*
		@@ -1321,10 +1178,10 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
		if (rq->nr_phys_segments > queue_max_segments(q)) {
		printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
		__func__, rq->nr_phys_segments, queue_max_segments(q));
		return -EIO;
		return BLK_STS_IOERR;
		}

		return 0;
		return BLK_STS_OK;
		}

		/**
		@@ -1334,8 +1191,11 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
		*/
		blk_status_t blk_insert_cloned_request(struct request_queue q, struct request rq)
		{
		if (blk_cloned_rq_check_limits(q, rq))
		return BLK_STS_IOERR;
		blk_status_t ret;

		ret = blk_cloned_rq_check_limits(q, rq);
		if (ret != BLK_STS_OK)
		return ret;

		if (rq->rq_disk &&
		should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
		@@ -1461,10 +1321,9 @@ void blk_account_io_start(struct request *rq)
		part_stat_unlock();
		}

		unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
		unsigned int op)
		static unsigned long __part_start_io_acct(struct hd_struct *part,
		unsigned int sectors, unsigned int op)
		{
		struct hd_struct *part = &disk->part0;
		const int sgrp = op_stat_group(op);
		unsigned long now = READ_ONCE(jiffies);

		@@ -1477,12 +1336,26 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,

		return now;
		}

		unsigned long part_start_io_acct(struct gendisk disk, struct hd_struct *part,
		struct bio *bio)
		{
		*part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector);

		return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio));
		}
		EXPORT_SYMBOL_GPL(part_start_io_acct);

		unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
		unsigned int op)
		{
		return __part_start_io_acct(&disk->part0, sectors, op);
		}
		EXPORT_SYMBOL(disk_start_io_acct);

		void disk_end_io_acct(struct gendisk *disk, unsigned int op,
		static void __part_end_io_acct(struct hd_struct *part, unsigned int op,
		unsigned long start_time)
		{
		struct hd_struct *part = &disk->part0;
		const int sgrp = op_stat_group(op);
		unsigned long now = READ_ONCE(jiffies);
		unsigned long duration = now - start_time;
		@@ -1493,6 +1366,20 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
		part_stat_local_dec(part, in_flight[op_is_write(op)]);
		part_stat_unlock();
		}

		void part_end_io_acct(struct hd_struct part, struct bio bio,
		unsigned long start_time)
		{
		__part_end_io_acct(part, bio_op(bio), start_time);
		hd_struct_put(part);
		}
		EXPORT_SYMBOL_GPL(part_end_io_acct);

		void disk_end_io_acct(struct gendisk *disk, unsigned int op,
		unsigned long start_time)
		{
		__part_end_io_acct(&disk->part0, op, start_time);
		}
		EXPORT_SYMBOL(disk_end_io_acct);

		/*

Admin message