Commit 0a85ed6e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - a small series fixing a use-after-free of bdi name (Christoph,Yufen)

 - NVMe fix for a regression with the smaller CQ update (Alexey)

 - NVMe fix for a hang at namespace scanning error recovery (Sagi)

 - fix race with blk-iocost iocg->abs_vdebt updates (Tejun)

* tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block:
  nvme: fix possible hang when ns scanning fails during error recovery
  nvme-pci: fix "slimmer CQ head update"
  bdi: add a ->dev_name field to struct backing_dev_info
  bdi: use bdi_dev_name() to get device name
  bdi: move bdi_dev_name out of line
  vboxsf: don't use the source name in the bdi name
  iocost: protect iocg->abs_vdebt with iocg->waitq.lock
parents e99332e7 59c7c3ca
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -123,6 +123,7 @@
#include <linux/ioprio.h>
#include <linux/sbitmap.h>
#include <linux/delay.h>
#include <linux/backing-dev.h>

#include "blk.h"
#include "blk-mq.h"
@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
	switch (ioprio_class) {
	default:
		dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
			"bfq: bad prio class %d\n", ioprio_class);
		pr_err("bdi %s: bfq: bad prio class %d\n",
				bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
				ioprio_class);
		/* fall through */
	case IOPRIO_CLASS_NONE:
		/*
+1 −1
Original line number Diff line number Diff line
@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
{
	/* some drivers (floppy) instantiate a queue w/o disk registered */
	if (blkg->q->backing_dev_info->dev)
		return dev_name(blkg->q->backing_dev_info->dev);
		return bdi_dev_name(blkg->q->backing_dev_info);
	return NULL;
}

+71 −46
Original line number Diff line number Diff line
@@ -466,7 +466,7 @@ struct ioc_gq {
	 */
	atomic64_t			vtime;
	atomic64_t			done_vtime;
	atomic64_t			abs_vdebt;
	u64				abs_vdebt;
	u64				last_vtime;

	/*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
	struct iocg_wake_ctx ctx = { .iocg = iocg };
	u64 margin_ns = (u64)(ioc->period_us *
			      WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
	u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
	u64 vdebt, vshortage, expires, oexpires;
	s64 vbudget;
	u32 hw_inuse;

@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
	vbudget = now->vnow - atomic64_read(&iocg->vtime);

	/* pay off debt */
	abs_vdebt = atomic64_read(&iocg->abs_vdebt);
	vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
	vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
	if (vdebt && vbudget > 0) {
		u64 delta = min_t(u64, vbudget, vdebt);
		u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
				    abs_vdebt);
				    iocg->abs_vdebt);

		atomic64_add(delta, &iocg->vtime);
		atomic64_add(delta, &iocg->done_vtime);
		atomic64_sub(abs_delta, &iocg->abs_vdebt);
		if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
			atomic64_set(&iocg->abs_vdebt, 0);
		iocg->abs_vdebt -= abs_delta;
	}

	/*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
	u64 expires, oexpires;
	u32 hw_inuse;

	lockdep_assert_held(&iocg->waitq.lock);

	/* debt-adjust vtime */
	current_hweight(iocg, NULL, &hw_inuse);
	vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
	vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);

	/* clear or maintain depending on the overage */
	if (time_before_eq64(vtime, now->vnow)) {
	/*
	 * Clear or maintain depending on the overage. Non-zero vdebt is what
	 * guarantees that @iocg is online and future iocg_kick_delay() will
	 * clear use_delay. Don't leave it on when there's no vdebt.
	 */
	if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
		blkcg_clear_delay(blkg);
		return false;
	}
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
{
	struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
	struct ioc_now now;
	unsigned long flags;

	spin_lock_irqsave(&iocg->waitq.lock, flags);
	ioc_now(iocg->ioc, &now);
	iocg_kick_delay(iocg, &now, 0);
	spin_unlock_irqrestore(&iocg->waitq.lock, flags);

	return HRTIMER_NORESTART;
}
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
	 * should have woken up in the last period and expire idle iocgs.
	 */
	list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
		if (!waitqueue_active(&iocg->waitq) &&
		    !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
		if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
		    !iocg_is_idle(iocg))
			continue;

		spin_lock(&iocg->waitq.lock);

		if (waitqueue_active(&iocg->waitq) ||
		    atomic64_read(&iocg->abs_vdebt)) {
		if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
			/* might be oversleeping vtime / hweight changes, kick */
			iocg_kick_waitq(iocg, &now);
			iocg_kick_delay(iocg, &now, 0);
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
	 * tests are racy but the races aren't systemic - we only miss once
	 * in a while which is fine.
	 */
	if (!waitqueue_active(&iocg->waitq) &&
	    !atomic64_read(&iocg->abs_vdebt) &&
	if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
	    time_before_eq64(vtime + cost, now.vnow)) {
		iocg_commit_bio(iocg, bio, cost);
		return;
	}

	/*
	 * We're over budget.  If @bio has to be issued regardless,
	 * remember the abs_cost instead of advancing vtime.
	 * iocg_kick_waitq() will pay off the debt before waking more IOs.
	 * We activated above but w/o any synchronization. Deactivation is
	 * synchronized with waitq.lock and we won't get deactivated as long
	 * as we're waiting or has debt, so we're good if we're activated
	 * here. In the unlikely case that we aren't, just issue the IO.
	 */
	spin_lock_irq(&iocg->waitq.lock);

	if (unlikely(list_empty(&iocg->active_list))) {
		spin_unlock_irq(&iocg->waitq.lock);
		iocg_commit_bio(iocg, bio, cost);
		return;
	}

	/*
	 * We're over budget. If @bio has to be issued regardless, remember
	 * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
	 * off the debt before waking more IOs.
	 *
	 * This way, the debt is continuously paid off each period with the
	 * actual budget available to the cgroup.  If we just wound vtime,
	 * we would incorrectly use the current hw_inuse for the entire
	 * amount which, for example, can lead to the cgroup staying
	 * blocked for a long time even with substantially raised hw_inuse.
	 * actual budget available to the cgroup. If we just wound vtime, we
	 * would incorrectly use the current hw_inuse for the entire amount
	 * which, for example, can lead to the cgroup staying blocked for a
	 * long time even with substantially raised hw_inuse.
	 *
	 * An iocg with vdebt should stay online so that the timer can keep
	 * deducting its vdebt and [de]activate use_delay mechanism
	 * accordingly. We don't want to race against the timer trying to
	 * clear them and leave @iocg inactive w/ dangling use_delay heavily
	 * penalizing the cgroup and its descendants.
	 */
	if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
		atomic64_add(abs_cost, &iocg->abs_vdebt);
		iocg->abs_vdebt += abs_cost;
		if (iocg_kick_delay(iocg, &now, cost))
			blkcg_schedule_throttle(rqos->q,
					(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
		spin_unlock_irq(&iocg->waitq.lock);
		return;
	}

@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
	 * All waiters are on iocg->waitq and the wait states are
	 * synchronized using waitq.lock.
	 */
	spin_lock_irq(&iocg->waitq.lock);

	/*
	 * We activated above but w/o any synchronization.  Deactivation is
	 * synchronized with waitq.lock and we won't get deactivated as
	 * long as we're waiting, so we're good if we're activated here.
	 * In the unlikely case that we are deactivated, just issue the IO.
	 */
	if (unlikely(list_empty(&iocg->active_list))) {
		spin_unlock_irq(&iocg->waitq.lock);
		iocg_commit_bio(iocg, bio, cost);
		return;
	}

	init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
	wait.wait.private = current;
	wait.bio = bio;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
	struct ioc_now now;
	u32 hw_inuse;
	u64 abs_cost, cost;
	unsigned long flags;

	/* bypass if disabled or for root cgroup */
	if (!ioc->enabled || !iocg->level)
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
		iocg->cursor = bio_end;

	/*
	 * Charge if there's enough vtime budget and the existing request
	 * has cost assigned.  Otherwise, account it as debt.  See debt
	 * handling in ioc_rqos_throttle() for details.
	 * Charge if there's enough vtime budget and the existing request has
	 * cost assigned.
	 */
	if (rq->bio && rq->bio->bi_iocost_cost &&
	    time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
	    time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
		iocg_commit_bio(iocg, bio, cost);
	else
		atomic64_add(abs_cost, &iocg->abs_vdebt);
		return;
	}

	/*
	 * Otherwise, account it as debt if @iocg is online, which it should
	 * be for the vast majority of cases. See debt handling in
	 * ioc_rqos_throttle() for details.
	 */
	spin_lock_irqsave(&iocg->waitq.lock, flags);
	if (likely(!list_empty(&iocg->active_list))) {
		iocg->abs_vdebt += abs_cost;
		iocg_kick_delay(iocg, &now, cost);
	} else {
		iocg_commit_bio(iocg, bio, cost);
	}
	spin_unlock_irqrestore(&iocg->waitq.lock, flags);
}

static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
	iocg->ioc = ioc;
	atomic64_set(&iocg->vtime, now.vnow);
	atomic64_set(&iocg->done_vtime, now.vnow);
	atomic64_set(&iocg->abs_vdebt, 0);
	atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
	INIT_LIST_HEAD(&iocg->active_list);
	iocg->hweight_active = HWEIGHT_WHOLE;
+1 −1
Original line number Diff line number Diff line
@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
		  * Don't treat an error as fatal, as we potentially already
		  * have a NGUID or EUI-64.
		  */
		if (status > 0)
		if (status > 0 && !(status & NVME_SC_DNR))
			status = 0;
		goto free_data;
	}
+5 −1
Original line number Diff line number Diff line
@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)

static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
	if (++nvmeq->cq_head == nvmeq->q_depth) {
	u16 tmp = nvmeq->cq_head + 1;

	if (tmp == nvmeq->q_depth) {
		nvmeq->cq_head = 0;
		nvmeq->cq_phase ^= 1;
	} else {
		nvmeq->cq_head = tmp;
	}
}

Loading