Commit 7e5192b9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.5/disk-revalidate-20191122' of git://git.kernel.dk/linux-block

Pull disk revalidation updates from Jens Axboe:
 "This continues the work that Jan Kara started to thoroughly cleanup
  and consolidate how we handle rescans and revalidations"

* tag 'for-5.5/disk-revalidate-20191122' of git://git.kernel.dk/linux-block:
  block: move clearing bd_invalidated into check_disk_size_change
  block: remove (__)blkdev_reread_part as an exported API
  block: fix bdev_disk_changed for non-partitioned devices
  block: move rescan_partitions to fs/block_dev.c
  block: merge invalidate_partitions into rescan_partitions
  block: refactor rescan_partitions
parents 464a47f4 979c690d
Loading
Loading
Loading
Loading
+5 −32
Original line number Diff line number Diff line
@@ -155,48 +155,21 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
	}
}

/*
 * This is an exported API for the block driver, and will not
 * acquire bd_mutex. This API should be used in case that
 * caller has held bd_mutex already.
 */
int __blkdev_reread_part(struct block_device *bdev)
static int blkdev_reread_part(struct block_device *bdev)
{
	struct gendisk *disk = bdev->bd_disk;
	int ret;

	if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
	if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains)
		return -EINVAL;
	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;

	lockdep_assert_held(&bdev->bd_mutex);

	return rescan_partitions(disk, bdev);
}
EXPORT_SYMBOL(__blkdev_reread_part);

/*
 * This is an exported API for the block driver, and will
 * try to acquire bd_mutex. If bd_mutex has been held already
 * in current context, please call __blkdev_reread_part().
 *
 * Make sure the held locks in current context aren't required
 * in open()/close() handler and I/O path for avoiding ABBA deadlock:
 * - bd_mutex is held before calling block driver's open/close
 *   handler
 * - reading partition table may submit I/O to the block device
 */
int blkdev_reread_part(struct block_device *bdev)
{
	int res;

	mutex_lock(&bdev->bd_mutex);
	res = __blkdev_reread_part(bdev);
	ret = bdev_disk_changed(bdev, false);
	mutex_unlock(&bdev->bd_mutex);

	return res;
	return ret;
}
EXPORT_SYMBOL(blkdev_reread_part);

static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
		unsigned long arg, unsigned long flags)
+79 −101
Original line number Diff line number Diff line
@@ -442,12 +442,14 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
	}
}

static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
{
	struct disk_part_iter piter;
	struct hd_struct *part;
	int res;

	if (!disk_part_scan_enabled(disk))
		return 0;
	if (bdev->bd_part_count || bdev->bd_super)
		return -EBUSY;
	res = invalidate_partition(disk, 0);
@@ -462,148 +464,124 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
	return 0;
}

int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
		struct parsed_partitions *state, int p)
{
	struct parsed_partitions *state = NULL;
	sector_t size = state->parts[p].size;
	sector_t from = state->parts[p].from;
	struct hd_struct *part;
	int p, highest, res;
rescan:
	if (state && !IS_ERR(state)) {
		free_partitions(state);
		state = NULL;

	if (!size)
		return true;

	if (from >= get_capacity(disk)) {
		printk(KERN_WARNING
		       "%s: p%d start %llu is beyond EOD, ",
		       disk->disk_name, p, (unsigned long long) from);
		if (disk_unlock_native_capacity(disk))
			return false;
		return true;
	}

	res = drop_partitions(disk, bdev);
	if (res)
		return res;
	if (from + size > get_capacity(disk)) {
		printk(KERN_WARNING
		       "%s: p%d size %llu extends beyond EOD, ",
		       disk->disk_name, p, (unsigned long long) size);

		if (disk_unlock_native_capacity(disk))
			return false;

		/*
		 * We can not ignore partitions of broken tables created by for
		 * example camera firmware, but we limit them to the end of the
		 * disk to avoid creating invalid block devices.
		 */
		size = get_capacity(disk) - from;
	}

	part = add_partition(disk, p, from, size, state->parts[p].flags,
			     &state->parts[p].info);
	if (IS_ERR(part)) {
		printk(KERN_ERR " %s: p%d could not be added: %ld\n",
		       disk->disk_name, p, -PTR_ERR(part));
		return true;
	}

#ifdef CONFIG_BLK_DEV_MD
	if (state->parts[p].flags & ADDPART_FLAG_RAID)
		md_autodetect_dev(part_to_dev(part)->devt);
#endif
	return true;
}

int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
{
	struct parsed_partitions *state;
	int ret = -EAGAIN, p, highest;

	if (!disk_part_scan_enabled(disk))
		return 0;

	if (disk->fops->revalidate_disk)
		disk->fops->revalidate_disk(disk);
	check_disk_size_change(disk, bdev, true);
	bdev->bd_invalidated = 0;
	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
	state = check_partition(disk, bdev);
	if (!state)
		return 0;
	if (IS_ERR(state)) {
		/*
		 * I/O error reading the partition table.  If any
		 * partition code tried to read beyond EOD, retry
		 * after unlocking native capacity.
		 * I/O error reading the partition table.  If we tried to read
		 * beyond EOD, retry after unlocking the native capacity.
		 */
		if (PTR_ERR(state) == -ENOSPC) {
			printk(KERN_WARNING "%s: partition table beyond EOD, ",
			       disk->disk_name);
			if (disk_unlock_native_capacity(disk))
				goto rescan;
				return -EAGAIN;
		}
		return -EIO;
	}

	/* Partitions are not supported on zoned block devices */
	/*
	 * Partitions are not supported on zoned block devices.
	 */
	if (bdev_is_zoned(bdev)) {
		pr_warn("%s: ignoring partition table on zoned block device\n",
			disk->disk_name);
		goto out;
		ret = 0;
		goto out_free_state;
	}

	/*
	 * If any partition code tried to read beyond EOD, try
	 * unlocking native capacity even if partition table is
	 * successfully read as we could be missing some partitions.
	 * If we read beyond EOD, try unlocking native capacity even if the
	 * partition table was successfully read as we could be missing some
	 * partitions.
	 */
	if (state->access_beyond_eod) {
		printk(KERN_WARNING
		       "%s: partition table partially beyond EOD, ",
		       disk->disk_name);
		if (disk_unlock_native_capacity(disk))
			goto rescan;
			goto out_free_state;
	}

	/* tell userspace that the media / partition table may have changed */
	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);

	/* Detect the highest partition number and preallocate
	 * disk->part_tbl.  This is an optimization and not strictly
	 * necessary.
	/*
	 * Detect the highest partition number and preallocate disk->part_tbl.
	 * This is an optimization and not strictly necessary.
	 */
	for (p = 1, highest = 0; p < state->limit; p++)
		if (state->parts[p].size)
			highest = p;

	disk_expand_part_tbl(disk, highest);

	/* add partitions */
	for (p = 1; p < state->limit; p++) {
		sector_t size, from;

		size = state->parts[p].size;
		if (!size)
			continue;

		from = state->parts[p].from;
		if (from >= get_capacity(disk)) {
			printk(KERN_WARNING
			       "%s: p%d start %llu is beyond EOD, ",
			       disk->disk_name, p, (unsigned long long) from);
			if (disk_unlock_native_capacity(disk))
				goto rescan;
			continue;
		}

		if (from + size > get_capacity(disk)) {
			printk(KERN_WARNING
			       "%s: p%d size %llu extends beyond EOD, ",
			       disk->disk_name, p, (unsigned long long) size);

			if (disk_unlock_native_capacity(disk)) {
				/* free state and restart */
				goto rescan;
			} else {
				/*
				 * we can not ignore partitions of broken tables
				 * created by for example camera firmware, but
				 * we limit them to the end of the disk to avoid
				 * creating invalid block devices
				 */
				size = get_capacity(disk) - from;
			}
		}
	for (p = 1; p < state->limit; p++)
		if (!blk_add_partition(disk, bdev, state, p))
			goto out_free_state;

		part = add_partition(disk, p, from, size,
				     state->parts[p].flags,
				     &state->parts[p].info);
		if (IS_ERR(part)) {
			printk(KERN_ERR " %s: p%d could not be added: %ld\n",
			       disk->disk_name, p, -PTR_ERR(part));
			continue;
		}
#ifdef CONFIG_BLK_DEV_MD
		if (state->parts[p].flags & ADDPART_FLAG_RAID)
			md_autodetect_dev(part_to_dev(part)->devt);
#endif
	}
out:
	ret = 0;
out_free_state:
	free_partitions(state);
	return 0;
}

int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
{
	int res;

	if (!bdev->bd_invalidated)
		return 0;

	res = drop_partitions(disk, bdev);
	if (res)
		return res;

	set_capacity(disk, 0);
	check_disk_size_change(disk, bdev, false);
	bdev->bd_invalidated = 0;
	/* tell userspace that the media / partition table may have changed */
	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);

	return 0;
	return ret;
}

unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+8 −5
Original line number Diff line number Diff line
@@ -640,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
	int rc;

	rc = blkdev_reread_part(bdev);
	mutex_lock(&bdev->bd_mutex);
	rc = bdev_disk_changed(bdev, false);
	mutex_unlock(&bdev->bd_mutex);
	if (rc)
		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
			__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -1164,10 +1166,11 @@ out_unlock:
		 * must be at least one and it can only become zero when the
		 * current holder is released.
		 */
		if (release)
			err = __blkdev_reread_part(bdev);
		else
			err = blkdev_reread_part(bdev);
		if (!release)
			mutex_lock(&bdev->bd_mutex);
		err = bdev_disk_changed(bdev, false);
		if (!release)
			mutex_unlock(&bdev->bd_mutex);
		if (err)
			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
				__func__, lo_number, err);
+3 −1
Original line number Diff line number Diff line
@@ -116,7 +116,9 @@ int dasd_scan_partitions(struct dasd_block *block)
		return -ENODEV;
	}

	rc = blkdev_reread_part(bdev);
	mutex_lock(&bdev->bd_mutex);
	rc = bdev_disk_changed(bdev, false);
	mutex_unlock(&bdev->bd_mutex);
	if (rc)
		DBF_DEV_EVENT(DBF_ERR, block->base,
				"scan partitions error, rc %d", rc);
+37 −11
Original line number Diff line number Diff line
@@ -1416,8 +1416,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
 * and adjusts it if it differs. When shrinking the bdev size, its all caches
 * are freed.
 */
void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
		bool verbose)
static void check_disk_size_change(struct gendisk *disk,
		struct block_device *bdev, bool verbose)
{
	loff_t disk_size, bdev_size;

@@ -1433,6 +1433,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
		if (bdev_size > disk_size)
			flush_disk(bdev, false);
	}
	bdev->bd_invalidated = 0;
}

/**
@@ -1462,7 +1463,6 @@ int revalidate_disk(struct gendisk *disk)

		mutex_lock(&bdev->bd_mutex);
		check_disk_size_change(disk, bdev, ret == 0);
		bdev->bd_invalidated = 0;
		mutex_unlock(&bdev->bd_mutex);
		bdput(bdev);
	}
@@ -1508,18 +1508,44 @@ EXPORT_SYMBOL(bd_set_size);

static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);

static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
	if (disk_part_scan_enabled(bdev->bd_disk)) {
	struct gendisk *disk = bdev->bd_disk;
	int ret;

	lockdep_assert_held(&bdev->bd_mutex);

rescan:
	ret = blk_drop_partitions(disk, bdev);
	if (ret)
		return ret;

	if (invalidate)
			invalidate_partitions(bdev->bd_disk, bdev);
		else
			rescan_partitions(bdev->bd_disk, bdev);
		set_capacity(disk, 0);
	else if (disk->fops->revalidate_disk)
		disk->fops->revalidate_disk(disk);

	check_disk_size_change(disk, bdev, !invalidate);

	if (get_capacity(disk)) {
		ret = blk_add_partitions(disk, bdev);
		if (ret == -EAGAIN)
			goto rescan;
	} else {
		check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
		bdev->bd_invalidated = 0;
		/*
		 * Tell userspace that the media / partition table may have
		 * changed.
		 */
		kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
	}

	return ret;
}
/*
 * Only exported for for loop and dasd for historic reasons.  Don't use in new
 * code!
 */
EXPORT_SYMBOL_GPL(bdev_disk_changed);

/*
 * bd_mutex locking:
Loading