Commit 5d47648f authored by Xiubo Li's avatar Xiubo Li Committed by Ilya Dryomov
Browse files

ceph: only choose one MDS who is in up:active state without laggy



Even the MDS is in up:active state, but it also maybe laggy. Here
will skip the laggy MDSs.

Signed-off-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 4d7ace02
Loading
Loading
Loading
Loading
+9 −4
Original line number Diff line number Diff line
@@ -974,14 +974,14 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
				     frag.frag, mds,
				     (int)r, frag.ndist);
				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
				    CEPH_MDS_STATE_ACTIVE)
				    CEPH_MDS_STATE_ACTIVE &&
				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
					goto out;
			}

			/* since this file/dir wasn't known to be
			 * replicated, then we want to look for the
			 * authoritative mds. */
			mode = USE_AUTH_MDS;
			if (frag.mds >= 0) {
				/* choose auth mds */
				mds = frag.mds;
@@ -989,10 +989,15 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
				     "frag %u mds%d (auth)\n",
				     inode, ceph_vinop(inode), frag.frag, mds);
				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
				    CEPH_MDS_STATE_ACTIVE)
				    CEPH_MDS_STATE_ACTIVE) {
					if (mode == USE_ANY_MDS &&
					    !ceph_mdsmap_is_laggy(mdsc->mdsmap,
								  mds))
						goto out;
				}
			}
			mode = USE_AUTH_MDS;
		}
	}

	spin_lock(&ci->i_ceph_lock);
+23 −7
Original line number Diff line number Diff line
@@ -13,22 +13,24 @@

#include "super.h"

#define CEPH_MDS_IS_READY(i, ignore_laggy) \
	(m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy))

/*
 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
 */
int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
{
	int n = 0;
	int i, j;

	/* special case for one mds */
	/*
	 * special case for one mds, no matter it is laggy or
	 * not we have no choice
	 */
	if (1 == m->m_num_mds && m->m_info[0].state > 0)
		return 0;

	/* count */
	for (i = 0; i < m->m_num_mds; i++)
		if (m->m_info[i].state > 0)
		if (CEPH_MDS_IS_READY(i, ignore_laggy))
			n++;
	if (n == 0)
		return -1;
@@ -36,7 +38,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
	/* pick */
	n = prandom_u32() % n;
	for (j = 0, i = 0; i < m->m_num_mds; i++) {
		if (m->m_info[i].state > 0)
		if (CEPH_MDS_IS_READY(i, ignore_laggy))
			j++;
		if (j > n)
			break;
@@ -45,6 +47,20 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
	return i;
}

/*
 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
 */
int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
{
	int mds;

	mds = __mdsmap_get_random_mds(m, false);
	if (mds == m->m_num_mds || mds == -1)
		mds = __mdsmap_get_random_mds(m, true);

	return mds == m->m_num_mds ? -1 : mds;
}

#define __decode_and_drop_type(p, end, type, bad)		\
	do {							\
		if (*p + sizeof(type) > end)			\