Commit 6c6b3549 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: set the zone size in blk_revalidate_disk_zones atomically



The current zone revalidation code has a major problem in that it
doesn't update the zone size and q->nr_zones atomically, leading
to a short window where an out of bounds access to the zone arrays
is possible.

To fix this move the setting of the zone size into the crticial
sections blk_revalidate_disk_zones so that it gets updated together
with the zone bitmaps and q->nr_zones.  This also slightly simplifies
the caller as it deducts the zone size from the report_zones.

This change also allows to check for a power of two zone size in generic
code.

Reported-by: default avatarHans Holmberg <hans@owltronix.com>
Reviewed-by: default avatarJavier González <javier@javigon.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ae58954d
Loading
Loading
Loading
Loading
+33 −26
Original line number Diff line number Diff line
@@ -343,6 +343,7 @@ struct blk_revalidate_zone_args {
	unsigned long	*conv_zones_bitmap;
	unsigned long	*seq_zones_wlock;
	unsigned int	nr_zones;
	sector_t	zone_sectors;
	sector_t	sector;
};

@@ -355,26 +356,34 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
	struct blk_revalidate_zone_args *args = data;
	struct gendisk *disk = args->disk;
	struct request_queue *q = disk->queue;
	sector_t zone_sectors = blk_queue_zone_sectors(q);
	sector_t capacity = get_capacity(disk);

	/*
	 * All zones must have the same size, with the exception on an eventual
	 * smaller last zone.
	 */
	if (zone->start + zone_sectors < capacity &&
	    zone->len != zone_sectors) {
	if (zone->start == 0) {
		if (zone->len == 0 || !is_power_of_2(zone->len)) {
			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
				disk->disk_name, zone->len);
			return -ENODEV;
		}

		args->zone_sectors = zone->len;
		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
	} else if (zone->start + args->zone_sectors < capacity) {
		if (zone->len != args->zone_sectors) {
			pr_warn("%s: Invalid zoned device with non constant zone size\n",
				disk->disk_name);
		return false;
			return -ENODEV;
		}

	if (zone->start + zone->len >= capacity &&
	    zone->len > zone_sectors) {
	} else {
		if (zone->len > args->zone_sectors) {
			pr_warn("%s: Invalid zoned device with larger last zone size\n",
				disk->disk_name);
			return -ENODEV;
		}
	}

	/* Check for holes in the zone report */
	if (zone->start != args->sector) {
@@ -428,9 +437,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
	struct request_queue *q = disk->queue;
	struct blk_revalidate_zone_args args = {
		.disk		= disk,
		.nr_zones	= blkdev_nr_zones(disk),
	};
	int ret = 0;
	unsigned int noio_flag;
	int ret;

	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
		return -EIO;
@@ -438,24 +447,22 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
		return -EIO;

	/*
	 * Ensure that all memory allocations in this context are done as
	 * if GFP_NOIO was specified.
	 * Ensure that all memory allocations in this context are done as if
	 * GFP_NOIO was specified.
	 */
	if (args.nr_zones) {
		unsigned int noio_flag = memalloc_noio_save();

		ret = disk->fops->report_zones(disk, 0, args.nr_zones,
	noio_flag = memalloc_noio_save();
	ret = disk->fops->report_zones(disk, 0, UINT_MAX,
				       blk_revalidate_zone_cb, &args);
	memalloc_noio_restore(noio_flag);
	}

	/*
	 * Install the new bitmaps, making sure the queue is stopped and
	 * all I/Os are completed (i.e. a scheduler is not referencing the
	 * bitmaps).
	 * Install the new bitmaps and update nr_zones only once the queue is
	 * stopped and all I/Os are completed (i.e. a scheduler is not
	 * referencing the bitmaps).
	 */
	blk_mq_freeze_queue(q);
	if (ret >= 0) {
		blk_queue_chunk_sectors(q, args.zone_sectors);
		q->nr_zones = args.nr_zones;
		swap(q->seq_zones_wlock, args.seq_zones_wlock);
		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
+2 −1
Original line number Diff line number Diff line
@@ -1583,6 +1583,8 @@ static int null_gendisk_register(struct nullb *nullb)
			if (ret)
				return ret;
		} else {
			blk_queue_chunk_sectors(nullb->q,
					nullb->dev->zone_size_sects);
			nullb->q->nr_zones = blkdev_nr_zones(disk);
		}
	}
@@ -1746,7 +1748,6 @@ static int null_add_dev(struct nullb_device *dev)
		if (rv)
			goto out_cleanup_blk_queue;

		blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
		nullb->q->limits.zoned = BLK_ZONED_HM;
		blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
		blk_queue_required_elevator_features(nullb->q,
+0 −2
Original line number Diff line number Diff line
@@ -412,8 +412,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
		goto err;

	/* The drive satisfies the kernel restrictions: set it up */
	blk_queue_chunk_sectors(sdkp->disk->queue,
			logical_to_sectors(sdkp->device, zone_blocks));
	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
	blk_queue_required_elevator_features(sdkp->disk->queue,
					     ELEVATOR_F_ZBD_SEQ_WRITE);