Commit 8075fc3b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A bit larger than usual this week, mostly due to the NVMe fixes
  arriving late for -rc3 and hence didn't make last weeks pull request.

   - NVMe:
        - instance leak and io boundary fixes from Keith
        - fc locking fix from Christophe
        - various tcp/rdma reset during traffic fixes from Sagi
        - pci use-after-free fix from Tong
        - tcp target null deref fix from Ziye

   - Locking fix for partition removal (Christoph)

   - Ensure bdi->io_pages is always set (me)

   - Fixup for hd struct reference (Ming)

   - Fix for zero length bvecs (Ming)

   - Two small blk-iocost fixes (Tejun)"

* tag 'block-5.9-2020-09-04' of git://git.kernel.dk/linux-block:
  block: allow for_each_bvec to support zero len bvec
  blk-stat: make q->stats->lock irqsafe
  blk-iocost: ioc_pd_free() shouldn't assume irq disabled
  block: fix locking in bdev_del_partition
  block: release disk reference in hd_struct_free_work
  block: ensure bdi->io_pages is always initialized
  nvme-pci: cancel nvme device request before disabling
  nvme: only use power of two io boundaries
  nvme: fix controller instance leak
  nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()'
  nvme: Fix NULL dereference for pci nvme controllers
  nvme-rdma: fix reset hang if controller died in the middle of a reset
  nvme-rdma: fix timeout handler
  nvme-rdma: serialize controller teardown sequences
  nvme-tcp: fix reset hang if controller died in the middle of a reset
  nvme-tcp: fix timeout handler
  nvme-tcp: serialize controller teardown sequences
  nvme: have nvme_wait_freeze_timeout return if it timed out
  nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance
  nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
parents d849ca48 7e249690
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -539,6 +539,7 @@ struct request_queue *blk_alloc_queue(int node_id)
		goto fail_stats;

	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
	q->backing_dev_info->io_pages = VM_READAHEAD_PAGES;
	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
	q->node = node_id;

+3 −2
Original line number Diff line number Diff line
@@ -2092,14 +2092,15 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
{
	struct ioc_gq *iocg = pd_to_iocg(pd);
	struct ioc *ioc = iocg->ioc;
	unsigned long flags;

	if (ioc) {
		spin_lock(&ioc->lock);
		spin_lock_irqsave(&ioc->lock, flags);
		if (!list_empty(&iocg->active_list)) {
			propagate_active_weight(iocg, 0, 0);
			list_del_init(&iocg->active_list);
		}
		spin_unlock(&ioc->lock);
		spin_unlock_irqrestore(&ioc->lock, flags);

		hrtimer_cancel(&iocg->waitq_timer);
		hrtimer_cancel(&iocg->delay_timer);
+11 −6
Original line number Diff line number Diff line
@@ -137,6 +137,7 @@ void blk_stat_add_callback(struct request_queue *q,
			   struct blk_stat_callback *cb)
{
	unsigned int bucket;
	unsigned long flags;
	int cpu;

	for_each_possible_cpu(cpu) {
@@ -147,20 +148,22 @@ void blk_stat_add_callback(struct request_queue *q,
			blk_rq_stat_init(&cpu_stat[bucket]);
	}

	spin_lock(&q->stats->lock);
	spin_lock_irqsave(&q->stats->lock, flags);
	list_add_tail_rcu(&cb->list, &q->stats->callbacks);
	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
	spin_unlock(&q->stats->lock);
	spin_unlock_irqrestore(&q->stats->lock, flags);
}

void blk_stat_remove_callback(struct request_queue *q,
			      struct blk_stat_callback *cb)
{
	spin_lock(&q->stats->lock);
	unsigned long flags;

	spin_lock_irqsave(&q->stats->lock, flags);
	list_del_rcu(&cb->list);
	if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
		blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
	spin_unlock(&q->stats->lock);
	spin_unlock_irqrestore(&q->stats->lock, flags);

	del_timer_sync(&cb->timer);
}
@@ -183,10 +186,12 @@ void blk_stat_free_callback(struct blk_stat_callback *cb)

void blk_stat_enable_accounting(struct request_queue *q)
{
	spin_lock(&q->stats->lock);
	unsigned long flags;

	spin_lock_irqsave(&q->stats->lock, flags);
	q->stats->enable_accounting = true;
	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
	spin_unlock(&q->stats->lock);
	spin_unlock_irqrestore(&q->stats->lock, flags);
}
EXPORT_SYMBOL_GPL(blk_stat_enable_accounting);

+22 −15
Original line number Diff line number Diff line
@@ -278,6 +278,15 @@ static void hd_struct_free_work(struct work_struct *work)
{
	struct hd_struct *part =
		container_of(to_rcu_work(work), struct hd_struct, rcu_work);
	struct gendisk *disk = part_to_disk(part);

	/*
	 * Release the disk reference acquired in delete_partition here.
	 * We can't release it in hd_struct_free because the final put_device
	 * needs process context and thus can't be run directly from a
	 * percpu_ref ->release handler.
	 */
	put_device(disk_to_dev(disk));

	part->start_sect = 0;
	part->nr_sects = 0;
@@ -293,7 +302,6 @@ static void hd_struct_free(struct percpu_ref *ref)
		rcu_dereference_protected(disk->part_tbl, 1);

	rcu_assign_pointer(ptbl->last_lookup, NULL);
	put_device(disk_to_dev(disk));

	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
	queue_rcu_work(system_wq, &part->rcu_work);
@@ -524,19 +532,20 @@ int bdev_add_partition(struct block_device *bdev, int partno,
int bdev_del_partition(struct block_device *bdev, int partno)
{
	struct block_device *bdevp;
	struct hd_struct *part;
	int ret = 0;

	part = disk_get_part(bdev->bd_disk, partno);
	if (!part)
		return -ENXIO;
	struct hd_struct *part = NULL;
	int ret;

	ret = -ENOMEM;
	bdevp = bdget(part_devt(part));
	bdevp = bdget_disk(bdev->bd_disk, partno);
	if (!bdevp)
		goto out_put_part;
		return -ENOMEM;

	mutex_lock(&bdevp->bd_mutex);
	mutex_lock_nested(&bdev->bd_mutex, 1);

	ret = -ENXIO;
	part = disk_get_part(bdev->bd_disk, partno);
	if (!part)
		goto out_unlock;

	ret = -EBUSY;
	if (bdevp->bd_openers)
@@ -545,15 +554,13 @@ int bdev_del_partition(struct block_device *bdev, int partno)
	sync_blockdev(bdevp);
	invalidate_bdev(bdevp);

	mutex_lock_nested(&bdev->bd_mutex, 1);
	delete_partition(bdev->bd_disk, part);
	mutex_unlock(&bdev->bd_mutex);

	ret = 0;
out_unlock:
	mutex_unlock(&bdev->bd_mutex);
	mutex_unlock(&bdevp->bd_mutex);
	bdput(bdevp);
out_put_part:
	if (part)
		disk_put_part(part);
	return ret;
}
+45 −11
Original line number Diff line number Diff line
@@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk,
	blk_mq_unfreeze_queue(disk->queue);
}

static inline bool nvme_first_scan(struct gendisk *disk)
{
	/* nvme_alloc_ns() scans the disk prior to adding it */
	return !(disk->flags & GENHD_FL_UP);
}

static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
{
	struct nvme_ctrl *ctrl = ns->ctrl;
	u32 iob;

	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
	    is_power_of_2(ctrl->max_hw_sectors))
		iob = ctrl->max_hw_sectors;
	else
		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

	if (!iob)
		return;

	if (!is_power_of_2(iob)) {
		if (nvme_first_scan(ns->disk))
			pr_warn("%s: ignoring unaligned IO boundary:%u\n",
				ns->disk->disk_name, iob);
		return;
	}

	if (blk_queue_is_zoned(ns->disk->queue)) {
		if (nvme_first_scan(ns->disk))
			pr_warn("%s: ignoring zoned namespace IO boundary\n",
				ns->disk->disk_name);
		return;
	}

	blk_queue_chunk_sectors(ns->queue, iob);
}

static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
	unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
	struct nvme_ns *ns = disk->private_data;
	struct nvme_ctrl *ctrl = ns->ctrl;
	int ret;
	u32 iob;

	/*
	 * If identify namespace failed, use default 512 byte block size so
@@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
		return -ENODEV;
	}

	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
	    is_power_of_2(ctrl->max_hw_sectors))
		iob = ctrl->max_hw_sectors;
	else
		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

	ns->features = 0;
	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
	/* the PI implementation requires metadata equal t10 pi tuple size */
@@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
		}
	}

	if (iob && !blk_queue_is_zoned(ns->queue))
		blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
	nvme_set_chunk_sectors(ns, id);
	nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
	if (ns->head->disk) {
@@ -3676,6 +3705,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
		return 0;
	if (a == &dev_attr_hostid.attr && !ctrl->opts)
		return 0;
	if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts)
		return 0;
	if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
		return 0;

	return a->mode;
}
@@ -4390,7 +4423,7 @@ static void nvme_free_ctrl(struct device *dev)
	struct nvme_subsystem *subsys = ctrl->subsys;
	struct nvme_cel *cel, *next;

	if (subsys && ctrl->instance != subsys->instance)
	if (!subsys || ctrl->instance != subsys->instance)
		ida_simple_remove(&nvme_instance_ida, ctrl->instance);

	list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
@@ -4534,7 +4567,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);

void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
{
	struct nvme_ns *ns;

@@ -4545,6 +4578,7 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
			break;
	}
	up_read(&ctrl->namespaces_rwsem);
	return timeout;
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);

Loading