Commit a0d93e32 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov
Browse files

ceph: remove delay check logic from ceph_check_caps()



__ceph_caps_file_wanted() already checks 'caps_wanted_delay_min' and
'caps_wanted_delay_max'. There is no need to duplicate the logic in
ceph_check_caps() and __send_cap()

Signed-off-by: default avatar"Yan, Zheng" <zyan@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 719a2514
Loading
Loading
Loading
Loading
+37 −111
Original line number Diff line number Diff line
@@ -490,13 +490,10 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
			       struct ceph_inode_info *ci)
{
	struct ceph_mount_options *opt = mdsc->fsc->mount_options;

	ci->i_hold_caps_min = round_jiffies(jiffies +
					    opt->caps_wanted_delay_min * HZ);
	ci->i_hold_caps_max = round_jiffies(jiffies +
					    opt->caps_wanted_delay_max * HZ);
	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
	dout("__cap_set_timeouts %p %lu\n", &ci->vfs_inode,
	     ci->i_hold_caps_max - jiffies);
}

/*
@@ -508,8 +505,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 *    -> we take mdsc->cap_delay_lock
 */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				struct ceph_inode_info *ci,
				bool set_timeout)
				struct ceph_inode_info *ci)
{
	dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->vfs_inode,
	     ci->i_ceph_flags, ci->i_hold_caps_max);
@@ -520,7 +516,6 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
				goto no_change;
			list_del_init(&ci->i_cap_delay_list);
		}
		if (set_timeout)
		__cap_set_timeouts(mdsc, ci);
		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
no_change:
@@ -733,7 +728,7 @@ void ceph_add_cap(struct inode *inode,
		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
		     ceph_cap_string(issued), ceph_cap_string(wanted),
		     ceph_cap_string(actual_wanted));
		__cap_delay_requeue(mdsc, ci, true);
		__cap_delay_requeue(mdsc, ci);
	}

	if (flags & CEPH_CAP_FLAG_AUTH) {
@@ -1342,7 +1337,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
	struct cap_msg_args arg;
	int held, revoking;
	int wake = 0;
	int delayed = 0;
	int ret;

	/* Don't send anything if it's still being created. Return delayed */
@@ -1362,28 +1356,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
	     ceph_cap_string(revoking));
	BUG_ON((retain & CEPH_CAP_PIN) == 0);

	arg.session = cap->session;

	/* don't release wanted unless we've waited a bit. */
	if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
	    time_before(jiffies, ci->i_hold_caps_min)) {
		dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
		     ceph_cap_string(cap->issued),
		     ceph_cap_string(cap->issued & retain),
		     ceph_cap_string(cap->mds_wanted),
		     ceph_cap_string(want));
		want |= cap->mds_wanted;
		retain |= cap->issued;
		delayed = 1;
	}
	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
	if (want & ~cap->mds_wanted) {
		/* user space may open/close single file frequently.
		 * This avoids droping mds_wanted immediately after
		 * requesting new mds_wanted.
		 */
		__cap_set_timeouts(mdsc, ci);
	}
	ci->i_ceph_flags &= ~CEPH_I_FLUSH;

	cap->issued &= retain;  /* drop bits we don't want */
	if (cap->implemented & ~cap->issued) {
@@ -1398,6 +1371,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
	cap->implemented &= cap->issued | used;
	cap->mds_wanted = want;

	arg.session = cap->session;
	arg.ino = ceph_vino(inode).ino;
	arg.cid = cap->cap_id;
	arg.follows = flushing ? ci->i_head_snapc->seq : 0;
@@ -1458,14 +1432,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,

	ret = send_cap_msg(&arg);
	if (ret < 0) {
		dout("error sending cap msg, must requeue %p\n", inode);
		delayed = 1;
		pr_err("error sending cap msg, ino (%llx.%llx) "
		       "flushing %s tid %llu, requeue\n",
		       ceph_vinop(inode), ceph_cap_string(flushing),
		       flush_tid);
		spin_lock(&ci->i_ceph_lock);
		__cap_delay_requeue(mdsc, ci);
		spin_unlock(&ci->i_ceph_lock);
	}

	if (wake)
		wake_up_all(&ci->i_cap_wq);

	return delayed;
	return ret;
}

static inline int __send_flush_snap(struct inode *inode,
@@ -1731,7 +1710,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
	    (mask & CEPH_CAP_FILE_BUFFER))
		dirty |= I_DIRTY_DATASYNC;
	__cap_delay_requeue(mdsc, ci, true);
	__cap_delay_requeue(mdsc, ci);
	return dirty;
}

@@ -1883,8 +1862,6 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
 * versus held caps.  Release, flush, ack revoked caps to mds as
 * appropriate.
 *
 *  CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
 *    cap release further.
 *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
 *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
 *    further delay.
@@ -1903,17 +1880,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
	int mds = -1;   /* keep track of how far we've gone through i_caps list
			   to avoid an infinite loop on retry */
	struct rb_node *p;
	int delayed = 0, sent = 0;
	bool no_delay = flags & CHECK_CAPS_NODELAY;
	bool queue_invalidate = false;
	bool tried_invalidate = false;

	/* if we are unmounting, flush any unused caps immediately. */
	if (mdsc->stopping)
		no_delay = true;

	spin_lock(&ci->i_ceph_lock);

	if (ci->i_ceph_flags & CEPH_I_FLUSH)
		flags |= CHECK_CAPS_FLUSH;

@@ -1960,14 +1930,13 @@ retry_locked:
	}

	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
	     " issued %s revoking %s retain %s %s%s%s\n", inode,
	     " issued %s revoking %s retain %s %s%s\n", inode,
	     ceph_cap_string(file_wanted),
	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
	     ceph_cap_string(ci->i_flushing_caps),
	     ceph_cap_string(issued), ceph_cap_string(revoking),
	     ceph_cap_string(retain),
	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
	     (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");

	/*
@@ -1975,7 +1944,7 @@ retry_locked:
	 * have cached pages, but don't want them, then try to invalidate.
	 * If we fail, it's because pages are locked.... try again later.
	 */
	if ((!no_delay || mdsc->stopping) &&
	if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) &&
	    S_ISREG(inode->i_mode) &&
	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
	    inode->i_data.nrpages &&		/* have cached pages */
@@ -2055,21 +2024,6 @@ retry_locked:
		if ((cap->issued & ~retain) == 0)
			continue;     /* nope, all good */

		if (no_delay)
			goto ack;

		/* delay? */
		if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
		    time_before(jiffies, ci->i_hold_caps_max)) {
			dout(" delaying issued %s -> %s, wanted %s -> %s\n",
			     ceph_cap_string(cap->issued),
			     ceph_cap_string(cap->issued & retain),
			     ceph_cap_string(cap->mds_wanted),
			     ceph_cap_string(want));
			delayed++;
			continue;
		}

ack:
		if (session && session != cap->session) {
			dout("oops, wrong session %p mutex\n", session);
@@ -2130,25 +2084,19 @@ ack:
		}

		mds = cap->mds;  /* remember mds, so we don't repeat */
		sent++;

		/* __send_cap drops i_ceph_lock */
		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0,
				cap_used, want, retain, flushing,
				flush_tid, oldest_flush_tid);
		__send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0, cap_used, want,
			   retain, flushing, flush_tid, oldest_flush_tid);
		goto retry; /* retake i_ceph_lock and restart our cap scan. */
	}

	if (list_empty(&ci->i_cap_delay_list)) {
	    if (delayed) {
		    /* Reschedule delayed caps release if we delayed anything */
		    __cap_delay_requeue(mdsc, ci, false);
	    } else if (__ceph_is_any_real_caps(ci) &&
	/* periodically re-calculate caps wanted by open files */
	if (__ceph_is_any_real_caps(ci) &&
	    list_empty(&ci->i_cap_delay_list) &&
	    (file_wanted & ~CEPH_CAP_PIN) &&
	    !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
		    /* periodically re-calculate caps wanted by open files */
		    __cap_delay_requeue(mdsc, ci, true);
	    }
		__cap_delay_requeue(mdsc, ci);
	}

	spin_unlock(&ci->i_ceph_lock);
@@ -2178,7 +2126,6 @@ retry:
retry_locked:
	if (ci->i_dirty_caps && ci->i_auth_cap) {
		struct ceph_cap *cap = ci->i_auth_cap;
		int delayed;

		if (session != cap->session) {
			spin_unlock(&ci->i_ceph_lock);
@@ -2207,18 +2154,10 @@ retry_locked:
						 &oldest_flush_tid);

		/* __send_cap drops i_ceph_lock */
		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
				     CEPH_CLIENT_CAPS_SYNC,
				     __ceph_caps_used(ci),
				     __ceph_caps_wanted(ci),
		__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
			   __ceph_caps_used(ci), __ceph_caps_wanted(ci),
			   (cap->issued | cap->implemented),
			   flushing, flush_tid, oldest_flush_tid);

		if (delayed) {
			spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci, true);
			spin_unlock(&ci->i_ceph_lock);
		}
	} else {
		if (!list_empty(&ci->i_cap_flush_list)) {
			struct ceph_cap_flush *cf =
@@ -2422,22 +2361,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
		if (cf->caps) {
			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
			ci->i_ceph_flags |= CEPH_I_NODELAY;

			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
			__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
					 (cf->tid < last_snap_flush ?
					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
					  __ceph_caps_used(ci),
					  __ceph_caps_wanted(ci),
					  (cap->issued | cap->implemented),
					  cf->caps, cf->tid, oldest_flush_tid);
			if (ret) {
				pr_err("kick_flushing_caps: error sending "
					"cap flush, ino (%llx.%llx) "
					"tid %llu flushing %s\n",
					ceph_vinop(inode), cf->tid,
					ceph_cap_string(cf->caps));
			}
		} else {
			struct ceph_cap_snap *capsnap =
					container_of(cf, struct ceph_cap_snap,
@@ -3059,7 +2989,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
	     last ? " last" : "", put ? " put" : "");

	if (last && !flushsnaps)
	if (last)
		ceph_check_caps(ci, 0, NULL);
	else if (flushsnaps)
		ceph_flush_snaps(ci, NULL);
@@ -3478,10 +3408,10 @@ static void handle_cap_grant(struct inode *inode,
		wake_up_all(&ci->i_cap_wq);

	if (check_caps == 1)
		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL,
				session);
	else if (check_caps == 2)
		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
		ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session);
	else
		mutex_unlock(&session->s_mutex);
}
@@ -4156,7 +4086,6 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
	struct inode *inode;
	struct ceph_inode_info *ci;
	int flags = CHECK_CAPS_NODELAY;

	dout("check_delayed_caps\n");
	while (1) {
@@ -4176,7 +4105,7 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)

		if (inode) {
			dout("check_delayed_caps on %p\n", inode);
			ceph_check_caps(ci, flags, NULL);
			ceph_check_caps(ci, 0, NULL);
			/* avoid calling iput_final() in tick thread */
			ceph_async_iput(inode);
		}
@@ -4201,7 +4130,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
		ihold(inode);
		dout("flush_dirty_caps %p\n", inode);
		spin_unlock(&mdsc->cap_dirty_lock);
		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
		ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
		iput(inode);
		spin_lock(&mdsc->cap_dirty_lock);
	}
@@ -4221,7 +4150,7 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
	if (fmode &&
	    __ceph_is_any_real_caps(ci) &&
	    list_empty(&ci->i_cap_delay_list))
		__cap_delay_requeue(mdsc, ci, true);
		__cap_delay_requeue(mdsc, ci);
}

void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
@@ -4280,7 +4209,6 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
	if (inode->i_nlink == 1) {
		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);

		ci->i_ceph_flags |= CEPH_I_NODELAY;
		if (__ceph_caps_dirty(ci)) {
			struct ceph_mds_client *mdsc =
				ceph_inode_to_client(inode)->mdsc;
@@ -4336,8 +4264,6 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
		if (force || (cap->issued & drop)) {
			if (cap->issued & drop) {
				int wanted = __ceph_caps_wanted(ci);
				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
					wanted |= cap->mds_wanted;
				dout("encode_inode_release %p cap %p "
				     "%s -> %s, wanted %s -> %s\n", inode, cap,
				     ceph_cap_string(cap->issued),
+4 −9
Original line number Diff line number Diff line
@@ -1826,7 +1826,7 @@ retry_snap:
		if (dirty)
			__mark_inode_dirty(inode, dirty);
		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
			ceph_check_caps(ci, 0, NULL);
	}

	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -2427,15 +2427,10 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
	inode_inc_iversion_raw(dst_inode);

	if (dst_off > size) {
		int caps_flags = 0;

		/* Let the MDS know about dst file size change */
		if (ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
			caps_flags |= CHECK_CAPS_NODELAY;
		if (ceph_inode_set_size(dst_inode, dst_off))
			caps_flags |= CHECK_CAPS_AUTHONLY;
		if (caps_flags)
			ceph_check_caps(dst_ci, caps_flags, NULL);
		if (ceph_inode_set_size(dst_inode, dst_off) ||
		    ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
			ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY, NULL);
	}
	/* Mark Fw dirty */
	spin_lock(&dst_ci->i_ceph_lock);
+0 −1
Original line number Diff line number Diff line
@@ -472,7 +472,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
	ci->i_prealloc_cap_flush = NULL;
	INIT_LIST_HEAD(&ci->i_cap_flush_list);
	init_waitqueue_head(&ci->i_cap_wq);
	ci->i_hold_caps_min = 0;
	ci->i_hold_caps_max = 0;
	INIT_LIST_HEAD(&ci->i_cap_delay_list);
	INIT_LIST_HEAD(&ci->i_cap_snaps);
+3 −5
Original line number Diff line number Diff line
@@ -173,9 +173,9 @@ struct ceph_cap {
	struct list_head caps_item;
};

#define CHECK_CAPS_NODELAY    1  /* do not delay any further */
#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */

struct ceph_cap_flush {
	u64 tid;
@@ -357,7 +357,6 @@ struct ceph_inode_info {
	struct ceph_cap_flush *i_prealloc_cap_flush;
	struct list_head i_cap_flush_list;
	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
	unsigned long i_hold_caps_min; /* jiffies */
	unsigned long i_hold_caps_max; /* jiffies */
	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
	struct ceph_cap_reservation i_cap_migration_resv;
@@ -518,7 +517,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 * Ceph inode.
 */
#define CEPH_I_DIR_ORDERED	(1 << 0)  /* dentries in dir are ordered */
#define CEPH_I_NODELAY		(1 << 1)  /* do not delay cap release */
#define CEPH_I_FLUSH		(1 << 2)  /* do not delay flush of dirty metadata */
#define CEPH_I_POOL_PERM	(1 << 3)  /* pool rd/wr bits are valid */
#define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */