Commit 95288a9b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - OSD/MDS latency and caps cache metrics infrastructure for the
     filesytem (Xiubo Li). Currently available through debugfs and will
     be periodically sent to the MDS in the future.

   - support for replica reads (balanced and localized reads) for rbd
     and the filesystem (myself). The default remains to always read
     from primary, users can opt-in with the new crush_location and
     read_from_replica options. Note that reading from replica is safe
     for general use only since Octopus.

   - support for RADOS allocation hint flags (myself). Currently used by
     rbd to propagate the compressible/incompressible hint given with
     the new compression_hint map option and ready for passing on more
     advanced hints, e.g. based on fadvise() from the filesystem.

   - support for efficient cross-quota-realm renames (Luis Henriques)

   - assorted cap handling improvements and cleanups, particularly
     untangling some of the locking (Jeff Layton)"

* tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client: (29 commits)
  rbd: compression_hint option
  libceph: support for alloc hint flags
  libceph: read_from_replica option
  libceph: support for balanced and localized reads
  libceph: crush_location infrastructure
  libceph: decode CRUSH device/bucket types and names
  libceph: add non-asserting rbtree insertion helper
  ceph: skip checking caps when session reconnecting and releasing reqs
  ceph: make sure mdsc->mutex is nested in s->s_mutex to fix dead lock
  ceph: don't return -ESTALE if there's still an open file
  libceph, rbd: replace zero-length array with flexible-array
  ceph: allow rename operation under different quota realms
  ceph: normalize 'delta' parameter usage in check_quota_exceeded
  ceph: ceph_kick_flushing_caps needs the s_mutex
  ceph: request expedited service on session's last cap flush
  ceph: convert mdsc->cap_dirty to a per-session list
  ceph: reset i_requested_max_size if file write is not wanted
  ceph: throw a warning if we destroy session with mutex still locked
  ceph: fix potential race in ceph_check_caps
  ceph: document what protects i_dirty_item and i_flushing_item
  ...
parents ca687877 dc1dad8e
Loading
Loading
Loading
Loading
+43 −1
Original line number Diff line number Diff line
@@ -836,6 +836,7 @@ enum {
	Opt_lock_timeout,
	/* int args above */
	Opt_pool_ns,
	Opt_compression_hint,
	/* string args above */
	Opt_read_only,
	Opt_read_write,
@@ -844,8 +845,23 @@ enum {
	Opt_notrim,
};

enum {
	Opt_compression_hint_none,
	Opt_compression_hint_compressible,
	Opt_compression_hint_incompressible,
};

static const struct constant_table rbd_param_compression_hint[] = {
	{"none",		Opt_compression_hint_none},
	{"compressible",	Opt_compression_hint_compressible},
	{"incompressible",	Opt_compression_hint_incompressible},
	{}
};

static const struct fs_parameter_spec rbd_parameters[] = {
	fsparam_u32	("alloc_size",			Opt_alloc_size),
	fsparam_enum	("compression_hint",		Opt_compression_hint,
			 rbd_param_compression_hint),
	fsparam_flag	("exclusive",			Opt_exclusive),
	fsparam_flag	("lock_on_read",		Opt_lock_on_read),
	fsparam_u32	("lock_timeout",		Opt_lock_timeout),
@@ -867,6 +883,8 @@ struct rbd_options {
	bool	lock_on_read;
	bool	exclusive;
	bool	trim;

	u32 alloc_hint_flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
};

#define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_MAX_RQ
@@ -2253,7 +2271,8 @@ static void __rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
	    !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) {
		osd_req_op_alloc_hint_init(osd_req, which++,
					   rbd_dev->layout.object_size,
					   rbd_dev->layout.object_size);
					   rbd_dev->layout.object_size,
					   rbd_dev->opts->alloc_hint_flags);
	}

	if (rbd_obj_is_entire(obj_req))
@@ -6331,6 +6350,29 @@ static int rbd_parse_param(struct fs_parameter *param,
		pctx->spec->pool_ns = param->string;
		param->string = NULL;
		break;
	case Opt_compression_hint:
		switch (result.uint_32) {
		case Opt_compression_hint_none:
			opt->alloc_hint_flags &=
			    ~(CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE |
			      CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE);
			break;
		case Opt_compression_hint_compressible:
			opt->alloc_hint_flags |=
			    CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
			opt->alloc_hint_flags &=
			    ~CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
			break;
		case Opt_compression_hint_incompressible:
			opt->alloc_hint_flags |=
			    CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
			opt->alloc_hint_flags &=
			    ~CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
			break;
		default:
			BUG();
		}
		break;
	case Opt_read_only:
		opt->read_only = true;
		break;
+1 −1
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ struct rbd_image_header_ondisk {
	__le32 snap_count;
	__le32 reserved;
	__le64 snap_names_len;
	struct rbd_image_snap_ondisk snaps[0];
	struct rbd_image_snap_ondisk snaps[];
} __attribute__((packed));


+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
	export.o caps.o snap.o xattr.o quota.o io.o \
	mds_client.o mdsmap.o strings.o ceph_frag.o \
	debugfs.o util.o
	debugfs.o util.o metric.o

ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
	struct ceph_inode_info *ci = ceph_inode(inode);

	spin_lock(&ci->i_ceph_lock);
	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
	if (__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 0))
		set_cached_acl(inode, type, acl);
	else
		forget_cached_acl(inode, type);
+20 −0
Original line number Diff line number Diff line
@@ -11,10 +11,12 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/signal.h>
#include <linux/iversion.h>
#include <linux/ktime.h>

#include "super.h"
#include "mds_client.h"
#include "cache.h"
#include "metric.h"
#include <linux/ceph/osd_client.h>
#include <linux/ceph/striper.h>

@@ -216,6 +218,9 @@ static int ceph_sync_readpages(struct ceph_fs_client *fsc,
	if (!rc)
		rc = ceph_osdc_wait_request(osdc, req);

	ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
				 req->r_end_latency, rc);

	ceph_osdc_put_request(req);
	dout("readpages result %d\n", rc);
	return rc;
@@ -299,6 +304,7 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req)
{
	struct inode *inode = req->r_inode;
	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
	struct ceph_osd_data *osd_data;
	int rc = req->r_result <= 0 ? req->r_result : 0;
	int bytes = req->r_result >= 0 ? req->r_result : 0;
@@ -336,6 +342,10 @@ unlock:
		put_page(page);
		bytes -= PAGE_SIZE;
	}

	ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
				 req->r_end_latency, rc);

	kfree(osd_data->pages);
}

@@ -643,6 +653,9 @@ static int ceph_sync_writepages(struct ceph_fs_client *fsc,
	if (!rc)
		rc = ceph_osdc_wait_request(osdc, req);

	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
				  req->r_end_latency, rc);

	ceph_osdc_put_request(req);
	if (rc == 0)
		rc = len;
@@ -794,6 +807,9 @@ static void writepages_finish(struct ceph_osd_request *req)
		ceph_clear_error_write(ci);
	}

	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
				  req->r_end_latency, rc);

	/*
	 * We lost the cache cap, need to truncate the page before
	 * it is unlocked, otherwise we'd truncate it later in the
@@ -1852,6 +1868,10 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!err)
		err = ceph_osdc_wait_request(&fsc->client->osdc, req);

	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
				  req->r_end_latency, err);

out_put:
	ceph_osdc_put_request(req);
	if (err == -ECANCELED)
Loading