Commit 7a62d0f0 authored by Nikolay Borisov's avatar Nikolay Borisov Committed by David Sterba
Browse files

btrfs: Handle one more split-brain scenario during fsid change



This commit continues hardening the scanning code to handle cases where
power loss could have caused disks in a multi-disk filesystem to be
in inconsistent state. Namely handle the situation that can occur when
some of the disks in multi-disk fs have completed their fsid change i.e
they have METADATA_UUID incompat flag set, have cleared the
CHANGING_FSID_V2 flag and their fsid/metadata_uuid are different. At
the same time the other half of the disks will have their
fsid/metadata_uuid unchanged and will only have CHANGING_FSID_V2 flag.

This is handled by introducing code in the scan path which:

 a) Handles the case when a device with CHANGING_FSID_V2 flag is
 scanned and as a result btrfs_fs_devices is created with matching
 fsid/metdata_uuid. Subsequently, when a device with completed fsid
 change is scanned it will detect this via the new code in find_fsid
 i.e that such an fs_devices exist that fsid_change flag is set to true,
 it's metadata_uuid/fsid match and the metadata_uuid of the scanned
 device matches that of the fs_devices. In this case, it's important to
 note that the devices which has its fsid change completed will have a
 higher generation number than the device with FSID_CHANGING_V2 flag
 set, so its superblock block will be used during mount. To prevent an
 assertion triggering because the sb used for mounting will have
 differing fsid/metadata_uuid than the ones in the fs_devices struct
 also add code in device_list_add which overwrites the values in
 fs_devices.

 b) Alternatively we can end up with a device that completed its
 fsid change be scanned first which will create the respective
 btrfs_fs_devices struct with differing fsid/metadata_uuid. In this
 case when a device with FSID_CHANGING_V2 flag set is scanned it will
 call the newly added find_fsid_inprogress function which will return
 the correct fs_devices.

Signed-off-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent d1a63002
Loading
Loading
Loading
Loading
+73 −4
Original line number Diff line number Diff line
@@ -389,6 +389,25 @@ static noinline struct btrfs_fs_devices *find_fsid(

	ASSERT(fsid);

	if (metadata_fsid) {
		/*
		 * Handle scanned device having completed its fsid change but
		 * belonging to a fs_devices that was created by first scanning
		 * a device which didn't have its fsid/metadata_uuid changed
		 * at all and the CHANGING_FSID_V2 flag set.
		 */
		list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
			if (fs_devices->fsid_change &&
			    memcmp(metadata_fsid, fs_devices->fsid,
				   BTRFS_FSID_SIZE) == 0 &&
			    memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
				   BTRFS_FSID_SIZE) == 0) {
				return fs_devices;
			}
		}
	}

	/* Handle non-split brain cases */
	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
		if (metadata_fsid) {
			if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
@@ -774,6 +793,27 @@ error_brelse:
	return -EINVAL;
}

/*
 * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
 * being created with a disk that has already completed its fsid change.
 */
static struct btrfs_fs_devices *find_fsid_inprogress(
					struct btrfs_super_block *disk_super)
{
	struct btrfs_fs_devices *fs_devices;

	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
			   BTRFS_FSID_SIZE) != 0 &&
		    memcmp(fs_devices->metadata_uuid, disk_super->fsid,
			   BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
			return fs_devices;
		}
	}

	return NULL;
}

/*
 * Add new device to list of registered devices
 *
@@ -786,7 +826,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
			   bool *new_device_added)
{
	struct btrfs_device *device;
	struct btrfs_fs_devices *fs_devices;
	struct btrfs_fs_devices *fs_devices = NULL;
	struct rcu_string *name;
	u64 found_transid = btrfs_super_generation(disk_super);
	u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -795,11 +835,25 @@ static noinline struct btrfs_device *device_list_add(const char *path,
	bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
					BTRFS_SUPER_FLAG_CHANGING_FSID_V2);

	if (has_metadata_uuid)
		fs_devices = find_fsid(disk_super->fsid, disk_super->metadata_uuid);
	else
	if (fsid_change_in_progress && !has_metadata_uuid) {
		/*
		 * When we have an image which has CHANGING_FSID_V2 set it might
		 * belong to either a filesystem which has disks with completed
		 * fsid change or it might belong to fs with no UUID changes in
		 * effect, handle both.
		 */
		fs_devices = find_fsid_inprogress(disk_super);
		if (!fs_devices)
			fs_devices = find_fsid(disk_super->fsid, NULL);

	} else if (has_metadata_uuid) {
		fs_devices = find_fsid(disk_super->fsid,
				       disk_super->metadata_uuid);
	} else {
		fs_devices = find_fsid(disk_super->fsid, NULL);
	}


	if (!fs_devices) {
		if (has_metadata_uuid)
			fs_devices = alloc_fs_devices(disk_super->fsid,
@@ -820,6 +874,21 @@ static noinline struct btrfs_device *device_list_add(const char *path,
		mutex_lock(&fs_devices->device_list_mutex);
		device = find_device(fs_devices, devid,
				disk_super->dev_item.uuid);

		/*
		 * If this disk has been pulled into an fs devices created by
		 * a device which had the CHANGING_FSID_V2 flag then replace the
		 * metadata_uuid/fsid values of the fs_devices.
		 */
		if (has_metadata_uuid && fs_devices->fsid_change &&
		    found_transid > fs_devices->latest_generation) {
			memcpy(fs_devices->fsid, disk_super->fsid,
					BTRFS_FSID_SIZE);
			memcpy(fs_devices->metadata_uuid,
					disk_super->metadata_uuid, BTRFS_FSID_SIZE);

			fs_devices->fsid_change = false;
		}
	}

	if (!device) {