Commit 9454473c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-20180210' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A few fixes to round off the merge window on the block side:

   - a set of bcache fixes by way of Michael Lyle, from the usual bcache
     suspects.

   - add a simple-to-hook-into function for bpf EIO error injection.

   - fix blk-wbt that mischarectized flushes as reads. Improve the logic
     so that flushes and writes are accounted as writes, and only reads
     as reads. From me.

   - fix requeue crash in BFQ, from Paolo"

* tag 'for-linus-20180210' of git://git.kernel.dk/linux-block:
  block, bfq: add requeue-request hook
  bcache: fix for data collapse after re-attaching an attached device
  bcache: return attach error when no cache set exist
  bcache: set writeback_rate_update_seconds in range [1, 60] seconds
  bcache: fix for allocator and register thread race
  bcache: set error_limit correctly
  bcache: properly set task state in bch_writeback_thread()
  bcache: fix high CPU occupancy during journal
  bcache: add journal statistic
  block: Add should_fail_bio() for bpf error injection
  blk-wbt: account flush requests correctly
parents cc5cb5af 8525e5ff
Loading
Loading
Loading
Loading
+82 −25
Original line number Diff line number Diff line
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
		}

		/*
		 * We exploit the bfq_finish_request hook to decrement
		 * rq_in_driver, but bfq_finish_request will not be
		 * invoked on this request. So, to avoid unbalance,
		 * just start this request, without incrementing
		 * rq_in_driver. As a negative consequence,
		 * rq_in_driver is deceptively lower than it should be
		 * while this request is in service. This may cause
		 * bfq_schedule_dispatch to be invoked uselessly.
		 * We exploit the bfq_finish_requeue_request hook to
		 * decrement rq_in_driver, but
		 * bfq_finish_requeue_request will not be invoked on
		 * this request. So, to avoid unbalance, just start
		 * this request, without incrementing rq_in_driver. As
		 * a negative consequence, rq_in_driver is deceptively
		 * lower than it should be while this request is in
		 * service. This may cause bfq_schedule_dispatch to be
		 * invoked uselessly.
		 *
		 * As for implementing an exact solution, the
		 * bfq_finish_request hook, if defined, is probably
		 * invoked also on this request. So, by exploiting
		 * this hook, we could 1) increment rq_in_driver here,
		 * and 2) decrement it in bfq_finish_request. Such a
		 * solution would let the value of the counter be
		 * always accurate, but it would entail using an extra
		 * interface function. This cost seems higher than the
		 * benefit, being the frequency of non-elevator-private
		 * bfq_finish_requeue_request hook, if defined, is
		 * probably invoked also on this request. So, by
		 * exploiting this hook, we could 1) increment
		 * rq_in_driver here, and 2) decrement it in
		 * bfq_finish_requeue_request. Such a solution would
		 * let the value of the counter be always accurate,
		 * but it would entail using an extra interface
		 * function. This cost seems higher than the benefit,
		 * being the frequency of non-elevator-private
		 * requests very low.
		 */
		goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
					   unsigned int cmd_flags) {}
#endif

static void bfq_prepare_request(struct request *rq, struct bio *bio);

static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			       bool at_head)
{
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
		else
			list_add_tail(&rq->queuelist, &bfqd->dispatch);
	} else {
		if (WARN_ON_ONCE(!bfqq)) {
			/*
			 * This should never happen. Most likely rq is
			 * a requeued regular request, being
			 * re-inserted without being first
			 * re-prepared. Do a prepare, to avoid
			 * failure.
			 */
			bfq_prepare_request(rq, rq->bio);
			bfqq = RQ_BFQQ(rq);
		}

		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
		/*
		 * Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
		bfq_schedule_dispatch(bfqd);
}

static void bfq_finish_request_body(struct bfq_queue *bfqq)
static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
{
	bfqq->allocated--;

	bfq_put_queue(bfqq);
}

static void bfq_finish_request(struct request *rq)
/*
 * Handle either a requeue or a finish for rq. The things to do are
 * the same in both cases: all references to rq are to be dropped. In
 * particular, rq is considered completed from the point of view of
 * the scheduler.
 */
static void bfq_finish_requeue_request(struct request *rq)
{
	struct bfq_queue *bfqq;
	struct bfq_queue *bfqq = RQ_BFQQ(rq);
	struct bfq_data *bfqd;

	if (!rq->elv.icq)
	/*
	 * Requeue and finish hooks are invoked in blk-mq without
	 * checking whether the involved request is actually still
	 * referenced in the scheduler. To handle this fact, the
	 * following two checks make this function exit in case of
	 * spurious invocations, for which there is nothing to do.
	 *
	 * First, check whether rq has nothing to do with an elevator.
	 */
	if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
		return;

	/*
	 * rq either is not associated with any icq, or is an already
	 * requeued request that has not (yet) been re-inserted into
	 * a bfq_queue.
	 */
	if (!rq->elv.icq || !bfqq)
		return;

	bfqq = RQ_BFQQ(rq);
	bfqd = bfqq->bfqd;

	if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
		spin_lock_irqsave(&bfqd->lock, flags);

		bfq_completed_request(bfqq, bfqd);
		bfq_finish_request_body(bfqq);
		bfq_finish_requeue_request_body(bfqq);

		spin_unlock_irqrestore(&bfqd->lock, flags);
	} else {
		/*
		 * Request rq may be still/already in the scheduler,
		 * in which case we need to remove it. And we cannot
		 * in which case we need to remove it (this should
		 * never happen in case of requeue). And we cannot
		 * defer such a check and removal, to avoid
		 * inconsistencies in the time interval from the end
		 * of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
			bfqg_stats_update_io_remove(bfqq_group(bfqq),
						    rq->cmd_flags);
		}
		bfq_finish_request_body(bfqq);
		bfq_finish_requeue_request_body(bfqq);
	}

	/*
	 * Reset private fields. In case of a requeue, this allows
	 * this function to correctly do nothing if it is spuriously
	 * invoked again on this same request (see the check at the
	 * beginning of the function). Probably, a better general
	 * design would be to prevent blk-mq from invoking the requeue
	 * or finish hooks of an elevator, for a request that is not
	 * referred by that elevator.
	 *
	 * Resetting the following fields would break the
	 * request-insertion logic if rq is re-inserted into a bfq
	 * internal queue, without a re-preparation. Here we assume
	 * that re-insertions of requeued requests, without
	 * re-preparation, can happen only for pass_through or at_head
	 * requests (which are not re-inserted into bfq internal
	 * queues).
	 */
	rq->elv.priv[0] = NULL;
	rq->elv.priv[1] = NULL;
}
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
	.ops.mq = {
		.limit_depth		= bfq_limit_depth,
		.prepare_request	= bfq_prepare_request,
		.finish_request		= bfq_finish_request,
		.requeue_request        = bfq_finish_requeue_request,
		.finish_request		= bfq_finish_requeue_request,
		.exit_icq		= bfq_exit_icq,
		.insert_requests	= bfq_insert_requests,
		.dispatch_request	= bfq_dispatch_request,
+10 −1
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>

#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
	return false;
}

static noinline int should_fail_bio(struct bio *bio)
{
	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
		return -EIO;
	return 0;
}
ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);

/*
 * Remap block n of partition p to block n+start(p) of the disk.
 */
@@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
		goto not_supported;

	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
	if (should_fail_bio(bio))
		goto end_io;

	if (!bio->bi_partno) {
+9 −1
Original line number Diff line number Diff line
@@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)

static int wbt_data_dir(const struct request *rq)
{
	return rq_data_dir(rq);
	const int op = req_op(rq);

	if (op == REQ_OP_READ)
		return READ;
	else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
		return WRITE;

	/* don't account */
	return -1;
}

int wbt_init(struct request_queue *q)
+3 −1
Original line number Diff line number Diff line
@@ -287,8 +287,10 @@ do { \
			break;						\
									\
		mutex_unlock(&(ca)->set->bucket_lock);			\
		if (kthread_should_stop())				\
		if (kthread_should_stop()) {				\
			set_current_state(TASK_RUNNING);		\
			return 0;					\
		}							\
									\
		schedule();						\
		mutex_lock(&(ca)->set->bucket_lock);			\
+8 −1
Original line number Diff line number Diff line
@@ -658,10 +658,15 @@ struct cache_set {
	atomic_long_t		writeback_keys_done;
	atomic_long_t		writeback_keys_failed;

	atomic_long_t		reclaim;
	atomic_long_t		flush_write;
	atomic_long_t		retry_flush_write;

	enum			{
		ON_ERROR_UNREGISTER,
		ON_ERROR_PANIC,
	}			on_error;
#define DEFAULT_IO_ERROR_LIMIT 8
	unsigned		error_limit;
	unsigned		error_decay;

@@ -675,6 +680,8 @@ struct cache_set {

#define BUCKET_HASH_BITS	12
	struct hlist_head	bucket_hash[1 << BUCKET_HASH_BITS];

	DECLARE_HEAP(struct btree *, flush_btree);
};

struct bbio {
@@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);

int bch_flash_dev_create(struct cache_set *c, uint64_t size);

int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
void bch_cached_dev_detach(struct cached_dev *);
void bch_cached_dev_run(struct cached_dev *);
void bcache_device_stop(struct bcache_device *);
Loading