Commit a12ed06b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.19-rc3' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "Two rbd patches to complete support for images within namespaces that
  went into -rc1 and a use-after-free fix.

  The rbd changes have been sitting in a branch for quite a while but
  couldn't be included into the -rc1 pull request because of a pending
  wire protocol backwards compatibility fixup that only got committed
  early this week"

* tag 'ceph-for-4.19-rc3' of https://github.com/ceph/ceph-client:
  rbd: support cloning across namespaces
  rbd: factor out get_parent_info()
  ceph: avoid a use-after-free in ceph_destroy_options()
parents d042a240 e92c0eaf
Loading
Loading
Loading
Loading
+178 −57
Original line number Diff line number Diff line
@@ -4207,11 +4207,13 @@ static ssize_t rbd_parent_show(struct device *dev,

		count += sprintf(&buf[count], "%s"
			    "pool_id %llu\npool_name %s\n"
			    "pool_ns %s\n"
			    "image_id %s\nimage_name %s\n"
			    "snap_id %llu\nsnap_name %s\n"
			    "overlap %llu\n",
			    !count ? "" : "\n", /* first? */
			    spec->pool_id, spec->pool_name,
			    spec->pool_ns ?: "",
			    spec->image_id, spec->image_name ?: "(unknown)",
			    spec->snap_id, spec->snap_name,
			    rbd_dev->parent_overlap);
@@ -4584,47 +4586,177 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
						&rbd_dev->header.features);
}

static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
{
	struct rbd_spec *parent_spec;
	size_t size;
	void *reply_buf = NULL;
	__le64 snapid;
	void *p;
	void *end;
struct parent_image_info {
	u64		pool_id;
	char *image_id;
	const char	*pool_ns;
	const char	*image_id;
	u64		snap_id;

	bool		has_overlap;
	u64		overlap;
};

/*
 * The caller is responsible for @pii.
 */
static int decode_parent_image_spec(void **p, void *end,
				    struct parent_image_info *pii)
{
	u8 struct_v;
	u32 struct_len;
	int ret;

	ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
				  &struct_v, &struct_len);
	if (ret)
		return ret;

	ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
	pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
	if (IS_ERR(pii->pool_ns)) {
		ret = PTR_ERR(pii->pool_ns);
		pii->pool_ns = NULL;
		return ret;
	}
	pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
	if (IS_ERR(pii->image_id)) {
		ret = PTR_ERR(pii->image_id);
		pii->image_id = NULL;
		return ret;
	}
	ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
	return 0;

e_inval:
	return -EINVAL;
}

static int __get_parent_info(struct rbd_device *rbd_dev,
			     struct page *req_page,
			     struct page *reply_page,
			     struct parent_image_info *pii)
{
	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
	size_t reply_len = PAGE_SIZE;
	void *p, *end;
	int ret;

	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
			     "rbd", "parent_get", CEPH_OSD_FLAG_READ,
			     req_page, sizeof(u64), reply_page, &reply_len);
	if (ret)
		return ret == -EOPNOTSUPP ? 1 : ret;

	p = page_address(reply_page);
	end = p + reply_len;
	ret = decode_parent_image_spec(&p, end, pii);
	if (ret)
		return ret;

	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
			     "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
			     req_page, sizeof(u64), reply_page, &reply_len);
	if (ret)
		return ret;

	p = page_address(reply_page);
	end = p + reply_len;
	ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
	if (pii->has_overlap)
		ceph_decode_64_safe(&p, end, pii->overlap, e_inval);

	return 0;

e_inval:
	return -EINVAL;
}

/*
 * The caller is responsible for @pii.
 */
static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
				    struct page *req_page,
				    struct page *reply_page,
				    struct parent_image_info *pii)
{
	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
	size_t reply_len = PAGE_SIZE;
	void *p, *end;
	int ret;

	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
			     "rbd", "get_parent", CEPH_OSD_FLAG_READ,
			     req_page, sizeof(u64), reply_page, &reply_len);
	if (ret)
		return ret;

	p = page_address(reply_page);
	end = p + reply_len;
	ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
	pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
	if (IS_ERR(pii->image_id)) {
		ret = PTR_ERR(pii->image_id);
		pii->image_id = NULL;
		return ret;
	}
	ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
	pii->has_overlap = true;
	ceph_decode_64_safe(&p, end, pii->overlap, e_inval);

	return 0;

e_inval:
	return -EINVAL;
}

static int get_parent_info(struct rbd_device *rbd_dev,
			   struct parent_image_info *pii)
{
	struct page *req_page, *reply_page;
	void *p;
	int ret;

	req_page = alloc_page(GFP_KERNEL);
	if (!req_page)
		return -ENOMEM;

	reply_page = alloc_page(GFP_KERNEL);
	if (!reply_page) {
		__free_page(req_page);
		return -ENOMEM;
	}

	p = page_address(req_page);
	ceph_encode_64(&p, rbd_dev->spec->snap_id);
	ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
	if (ret > 0)
		ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
					       pii);

	__free_page(req_page);
	__free_page(reply_page);
	return ret;
}

static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
{
	struct rbd_spec *parent_spec;
	struct parent_image_info pii = { 0 };
	int ret;

	parent_spec = rbd_spec_alloc();
	if (!parent_spec)
		return -ENOMEM;

	size = sizeof (__le64) +				/* pool_id */
		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
		sizeof (__le64) +				/* snap_id */
		sizeof (__le64);				/* overlap */
	reply_buf = kmalloc(size, GFP_KERNEL);
	if (!reply_buf) {
		ret = -ENOMEM;
	ret = get_parent_info(rbd_dev, &pii);
	if (ret)
		goto out_err;
	}

	snapid = cpu_to_le64(rbd_dev->spec->snap_id);
	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
				  &rbd_dev->header_oloc, "get_parent",
				  &snapid, sizeof(snapid), reply_buf, size);
	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
	if (ret < 0)
		goto out_err;
	dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
	     __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
	     pii.has_overlap, pii.overlap);

	p = reply_buf;
	end = reply_buf + ret;
	ret = -ERANGE;
	ceph_decode_64_safe(&p, end, pool_id, out_err);
	if (pool_id == CEPH_NOPOOL) {
	if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
		/*
		 * Either the parent never existed, or we have
		 * record of it but the image got flattened so it no
@@ -4633,6 +4765,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
		 * overlap to 0.  The effect of this is that all new
		 * requests will be treated as if the image had no
		 * parent.
		 *
		 * If !pii.has_overlap, the parent image spec is not
		 * applicable.  It's there to avoid duplication in each
		 * snapshot record.
		 */
		if (rbd_dev->parent_overlap) {
			rbd_dev->parent_overlap = 0;
@@ -4647,51 +4783,36 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
	/* The ceph file layout needs to fit pool id in 32 bits */

	ret = -EIO;
	if (pool_id > (u64)U32_MAX) {
	if (pii.pool_id > (u64)U32_MAX) {
		rbd_warn(NULL, "parent pool id too large (%llu > %u)",
			(unsigned long long)pool_id, U32_MAX);
			(unsigned long long)pii.pool_id, U32_MAX);
		goto out_err;
	}

	image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
	if (IS_ERR(image_id)) {
		ret = PTR_ERR(image_id);
		goto out_err;
	}
	ceph_decode_64_safe(&p, end, snap_id, out_err);
	ceph_decode_64_safe(&p, end, overlap, out_err);

	/*
	 * The parent won't change (except when the clone is
	 * flattened, already handled that).  So we only need to
	 * record the parent spec we have not already done so.
	 */
	if (!rbd_dev->parent_spec) {
		parent_spec->pool_id = pool_id;
		parent_spec->image_id = image_id;
		parent_spec->snap_id = snap_id;

		/* TODO: support cloning across namespaces */
		if (rbd_dev->spec->pool_ns) {
			parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
						       GFP_KERNEL);
			if (!parent_spec->pool_ns) {
				ret = -ENOMEM;
				goto out_err;
			}
		parent_spec->pool_id = pii.pool_id;
		if (pii.pool_ns && *pii.pool_ns) {
			parent_spec->pool_ns = pii.pool_ns;
			pii.pool_ns = NULL;
		}
		parent_spec->image_id = pii.image_id;
		pii.image_id = NULL;
		parent_spec->snap_id = pii.snap_id;

		rbd_dev->parent_spec = parent_spec;
		parent_spec = NULL;	/* rbd_dev now owns this */
	} else {
		kfree(image_id);
	}

	/*
	 * We always update the parent overlap.  If it's zero we issue
	 * a warning, as we will proceed as if there was no parent.
	 */
	if (!overlap) {
	if (!pii.overlap) {
		if (parent_spec) {
			/* refresh, careful to warn just once */
			if (rbd_dev->parent_overlap)
@@ -4702,14 +4823,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
			rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
		}
	}
	rbd_dev->parent_overlap = overlap;
	rbd_dev->parent_overlap = pii.overlap;

out:
	ret = 0;
out_err:
	kfree(reply_buf);
	kfree(pii.pool_ns);
	kfree(pii.image_id);
	rbd_spec_put(parent_spec);

	return ret;
}

+11 −5
Original line number Diff line number Diff line
@@ -602,6 +602,8 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)

/*
 * create a new fs client
 *
 * Success or not, this function consumes @fsopt and @opt.
 */
static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
					struct ceph_options *opt)
@@ -609,17 +611,20 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
	struct ceph_fs_client *fsc;
	int page_count;
	size_t size;
	int err = -ENOMEM;
	int err;

	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
	if (!fsc)
		return ERR_PTR(-ENOMEM);
	if (!fsc) {
		err = -ENOMEM;
		goto fail;
	}

	fsc->client = ceph_create_client(opt, fsc);
	if (IS_ERR(fsc->client)) {
		err = PTR_ERR(fsc->client);
		goto fail;
	}
	opt = NULL; /* fsc->client now owns this */

	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
	fsc->client->osdc.abort_on_full = true;
@@ -677,6 +682,9 @@ fail_client:
	ceph_destroy_client(fsc->client);
fail:
	kfree(fsc);
	if (opt)
		ceph_destroy_options(opt);
	destroy_mount_options(fsopt);
	return ERR_PTR(err);
}

@@ -1042,8 +1050,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
	fsc = create_fs_client(fsopt, opt);
	if (IS_ERR(fsc)) {
		res = ERR_CAST(fsc);
		destroy_mount_options(fsopt);
		ceph_destroy_options(opt);
		goto out_final;
	}