Commit 31990f0f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - a series that fixes some old memory allocation issues in libceph
     (myself). We no longer allocate memory in places where allocation
     failures cannot be handled and BUG when the allocation fails.

   - support for copy_file_range() syscall (Luis Henriques). If size and
     alignment conditions are met, it leverages RADOS copy-from
     operation. Otherwise, a local copy is performed.

   - a patch that reduces memory requirement of ceph_sync_read() from
     the size of the entire read to the size of one object (Zheng Yan).

   - fallocate() syscall is now restricted to FALLOC_FL_PUNCH_HOLE (Luis
     Henriques)"

* tag 'ceph-for-4.20-rc1' of git://github.com/ceph/ceph-client: (25 commits)
  ceph: new mount option to disable usage of copy-from op
  ceph: support copy_file_range file operation
  libceph: support the RADOS copy-from operation
  ceph: add non-blocking parameter to ceph_try_get_caps()
  libceph: check reply num_data_items in setup_request_data()
  libceph: preallocate message data items
  libceph, rbd, ceph: move ceph_osdc_alloc_messages() calls
  libceph: introduce alloc_watch_request()
  libceph: assign cookies in linger_submit()
  libceph: enable fallback to ceph_msg_new() in ceph_msgpool_get()
  ceph: num_ops is off by one in ceph_aio_retry_work()
  libceph: no need to call osd_req_opcode_valid() in osd_req_encode_op()
  ceph: set timeout conditionally in __cap_delay_requeue
  libceph: don't consume a ref on pagelist in ceph_msg_data_add_pagelist()
  libceph: introduce ceph_pagelist_alloc()
  libceph: osd_req_op_cls_init() doesn't need to take opcode
  libceph: bump CEPH_MSG_MAX_DATA_LEN
  ceph: only allow punch hole mode in fallocate
  ceph: refactor ceph_sync_read()
  ceph: check if LOOKUPNAME request was aborted when filling trace
  ...
parents a9ac6cc4 ea4cdc54
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -151,6 +151,11 @@ Mount Options
        Report overall filesystem usage in statfs instead of using the root
        directory quota.

  nocopyfrom
        Don't use the RADOS 'copy-from' operation to perform remote object
        copies.  Currently, it's only used in copy_file_range, which will revert
        to the default VFS implementation if this option is used.

More Information
================

+16 −12
Original line number Diff line number Diff line
@@ -1500,9 +1500,6 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
		goto err_req;

	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
		goto err_req;

	return req;

err_req:
@@ -1945,6 +1942,10 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
		}
		if (ret)
			return ret;

		ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
		if (ret)
			return ret;
	}

	return 0;
@@ -2374,8 +2375,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
	if (!obj_req->osd_req)
		return -ENOMEM;

	ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
				  "copyup");
	ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
	if (ret)
		return ret;

@@ -2405,6 +2405,10 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
		rbd_assert(0);
	}

	ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
	if (ret)
		return ret;

	rbd_obj_request_submit(obj_req);
	return 0;
}
@@ -3784,10 +3788,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
	ceph_oloc_copy(&req->r_base_oloc, oloc);
	req->r_flags = CEPH_OSD_FLAG_READ;

	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
	if (ret)
		goto out_req;

	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
	if (IS_ERR(pages)) {
		ret = PTR_ERR(pages);
@@ -3798,6 +3798,10 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
	osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
					 true);

	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
	if (ret)
		goto out_req;

	ceph_osdc_start_request(osdc, req, false);
	ret = ceph_osdc_wait_request(osdc, req);
	if (ret >= 0)
@@ -6067,7 +6071,7 @@ static ssize_t rbd_remove_single_major(struct bus_type *bus,
 * create control files in sysfs
 * /sys/bus/rbd/...
 */
static int rbd_sysfs_init(void)
static int __init rbd_sysfs_init(void)
{
	int ret;

@@ -6082,13 +6086,13 @@ static int rbd_sysfs_init(void)
	return ret;
}

static void rbd_sysfs_cleanup(void)
static void __exit rbd_sysfs_cleanup(void)
{
	bus_unregister(&rbd_bus_type);
	device_unregister(&rbd_root_dev);
}

static int rbd_slab_init(void)
static int __init rbd_slab_init(void)
{
	rbd_assert(!rbd_img_request_cache);
	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
+6 −7
Original line number Diff line number Diff line
@@ -104,6 +104,11 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
	struct timespec64 old_ctime = inode->i_ctime;
	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;

	if (ceph_snap(inode) != CEPH_NOSNAP) {
		ret = -EROFS;
		goto out;
	}

	switch (type) {
	case ACL_TYPE_ACCESS:
		name = XATTR_NAME_POSIX_ACL_ACCESS;
@@ -138,11 +143,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
			goto out_free;
	}

	if (ceph_snap(inode) != CEPH_NOSNAP) {
		ret = -EROFS;
		goto out_free;
	}

	if (new_mode != old_mode) {
		newattrs.ia_ctime = current_time(inode);
		newattrs.ia_mode = new_mode;
@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
	tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
	if (!tmp_buf)
		goto out_err;
	pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL);
	pagelist = ceph_pagelist_alloc(GFP_KERNEL);
	if (!pagelist)
		goto out_err;
	ceph_pagelist_init(pagelist);

	err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
	if (err)
+1 −1
Original line number Diff line number Diff line
@@ -322,7 +322,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
		/* caller of readpages does not hold buffer and read caps
		 * (fadvise, madvise and readahead cases) */
		int want = CEPH_CAP_FILE_CACHE;
		ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
		ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
		if (ret < 0) {
			dout("start_read %p, error getting cap\n", inode);
		} else if (!(got & want)) {
+12 −9
Original line number Diff line number Diff line
@@ -519,9 +519,9 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 *    -> we take mdsc->cap_delay_lock
 */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				struct ceph_inode_info *ci)
				struct ceph_inode_info *ci,
				bool set_timeout)
{
	__cap_set_timeouts(mdsc, ci);
	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
	     ci->i_ceph_flags, ci->i_hold_caps_max);
	if (!mdsc->stopping) {
@@ -531,6 +531,8 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				goto no_change;
			list_del_init(&ci->i_cap_delay_list);
		}
		if (set_timeout)
			__cap_set_timeouts(mdsc, ci);
		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
no_change:
		spin_unlock(&mdsc->cap_delay_lock);
@@ -720,7 +722,7 @@ void ceph_add_cap(struct inode *inode,
		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
		     ceph_cap_string(issued), ceph_cap_string(wanted),
		     ceph_cap_string(actual_wanted));
		__cap_delay_requeue(mdsc, ci);
		__cap_delay_requeue(mdsc, ci, true);
	}

	if (flags & CEPH_CAP_FLAG_AUTH) {
@@ -1647,7 +1649,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
	    (mask & CEPH_CAP_FILE_BUFFER))
		dirty |= I_DIRTY_DATASYNC;
	__cap_delay_requeue(mdsc, ci);
	__cap_delay_requeue(mdsc, ci, true);
	return dirty;
}

@@ -2065,7 +2067,7 @@ ack:

	/* Reschedule delayed caps release if we delayed anything */
	if (delayed)
		__cap_delay_requeue(mdsc, ci);
		__cap_delay_requeue(mdsc, ci, false);

	spin_unlock(&ci->i_ceph_lock);

@@ -2125,7 +2127,7 @@ retry:

		if (delayed) {
			spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci);
			__cap_delay_requeue(mdsc, ci, true);
			spin_unlock(&ci->i_ceph_lock);
		}
	} else {
@@ -2671,17 +2673,18 @@ static void check_max_size(struct inode *inode, loff_t endoff)
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
}

int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int *got)
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
		      bool nonblock, int *got)
{
	int ret, err = 0;

	BUG_ON(need & ~CEPH_CAP_FILE_RD);
	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
	ret = ceph_pool_perm_check(ci, need);
	if (ret < 0)
		return ret;

	ret = try_get_cap_refs(ci, need, want, 0, true, got, &err);
	ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
	if (ret) {
		if (err == -EAGAIN) {
			ret = 0;
Loading