Commit f236f69b authored by Lai Siyao's avatar Lai Siyao Committed by Greg Kroah-Hartman
Browse files

lustre/llite: simplify dentry revalidate



Lustre client dentry validation is protected by LDLM lock, so
any time a dentry is found, it's valid and no need to revalidate
from MDS, and even it does, there is race that it may be
invalidated after revalidation is finished.

Signed-off-by: default avatarLai Siyao <lai.siyao@intel.com>
Reviewed-on: http://review.whamcloud.com/7475
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3544


Reviewed-by: default avatarPeng Tao <bergwolf@gmail.com>
Reviewed-by: default avatarBob Glossman <bob.glossman@intel.com>
Reviewed-by: default avatarJohn L. Hammond <john.hammond@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 63d42578
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2112,7 +2112,7 @@ extern void lustre_swab_generic_32s (__u32 *val);
#define DISP_LOOKUP_POS      0x00000008
#define DISP_OPEN_CREATE     0x00000010
#define DISP_OPEN_OPEN       0x00000020
#define DISP_ENQ_COMPLETE    0x00400000
#define DISP_ENQ_COMPLETE    0x00400000		/* obsolete and unused */
#define DISP_ENQ_OPEN_REF    0x00800000
#define DISP_ENQ_CREATE_REF  0x01000000
#define DISP_OPEN_LOCK       0x02000000
+23 −267
Original line number Diff line number Diff line
@@ -241,9 +241,6 @@ void ll_intent_release(struct lookup_intent *it)
		 ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */
	if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
		ptlrpc_req_finished(it->d.lustre.it_data);
	if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
						    * to lookup */
		ptlrpc_req_finished(it->d.lustre.it_data);

	it->d.lustre.it_disposition = 0;
	it->d.lustre.it_data = NULL;
@@ -328,262 +325,32 @@ void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)

}

int ll_revalidate_it(struct dentry *de, int lookup_flags,
		     struct lookup_intent *it)
static int ll_revalidate_dentry(struct dentry *dentry,
				unsigned int lookup_flags)
{
	struct md_op_data *op_data;
	struct ptlrpc_request *req = NULL;
	struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
	struct obd_export *exp;
	struct inode *parent = de->d_parent->d_inode;
	int rc;

	CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
	       LL_IT2STR(it));

	LASSERT(de != de->d_sb->s_root);

	if (de->d_inode == NULL) {
		__u64 ibits;

		/* We can only use negative dentries if this is stat or lookup,
		   for opens and stuff we do need to query server. */
		/* If there is IT_CREAT in intent op set, then we must throw
		   away this negative dentry and actually do the request to
		   kernel to create whatever needs to be created (if possible)*/
		if (it && (it->it_op & IT_CREAT))
			return 0;

		if (d_lustre_invalid(de))
			return 0;

		ibits = MDS_INODELOCK_UPDATE;
		rc = ll_have_md_lock(parent, &ibits, LCK_MINMODE);
		GOTO(out_sa, rc);
	}

	/* Never execute intents for mount points.
	 * Attributes will be fixed up in ll_inode_revalidate_it */
	if (d_mountpoint(de))
		GOTO(out_sa, rc = 1);

	exp = ll_i2mdexp(de->d_inode);

	OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
	ll_frob_intent(&it, &lookup_it);
	LASSERT(it);

	if (it->it_op == IT_LOOKUP && !d_lustre_invalid(de))
		return 1;

	if (it->it_op == IT_OPEN) {
		struct inode *inode = de->d_inode;
		struct ll_inode_info *lli = ll_i2info(inode);
		struct obd_client_handle **och_p;
		__u64 ibits;
	struct inode *dir = dentry->d_parent->d_inode;

	/*
		 * We used to check for MDS_INODELOCK_OPEN here, but in fact
		 * just having LOOKUP lock is enough to justify inode is the
		 * same. And if inode is the same and we have suitable
		 * openhandle, then there is no point in doing another OPEN RPC
		 * just to throw away newly received openhandle.  There are no
		 * security implications too, if file owner or access mode is
		 * change, LOOKUP lock is revoked.
	 * if open&create is set, talk to MDS to make sure file is created if
	 * necessary, because we can't do this in ->open() later since that's
	 * called on an inode. return 0 here to let lookup to handle this.
	 */
	if ((lookup_flags & (LOOKUP_OPEN | LOOKUP_CREATE)) ==
	    (LOOKUP_OPEN | LOOKUP_CREATE))
		return 0;


		if (it->it_flags & FMODE_WRITE)
			och_p = &lli->lli_mds_write_och;
		else if (it->it_flags & FMODE_EXEC)
			och_p = &lli->lli_mds_exec_och;
		else
			och_p = &lli->lli_mds_read_och;

		/* Check for the proper lock. */
		ibits = MDS_INODELOCK_LOOKUP;
		if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE))
			goto do_lock;
		mutex_lock(&lli->lli_och_mutex);
		if (*och_p) { /* Everything is open already, do nothing */
			/* Originally it was idea to do not let them steal our
			 * open handle from under us by (*och_usecount)++ here.
			 * But in case we have the handle, but we cannot use it
			 * due to later checks (e.g. O_CREAT|O_EXCL flags set),
			 * nobody would decrement counter increased here. So we
			 * just hope the lock won't be invalidated in between.
			 * But if it would be, we'll reopen the open request to
			 * MDS later during file open path.
			 */
			mutex_unlock(&lli->lli_och_mutex);
	if (lookup_flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
		return 1;
		}
		mutex_unlock(&lli->lli_och_mutex);
	}

	if (it->it_op == IT_GETATTR) {
		rc = ll_statahead_enter(parent, &de, 0);
		if (rc == 1)
			goto mark;
		else if (rc != -EAGAIN && rc != 0)
			GOTO(out, rc = 0);
	}

do_lock:
	op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
				     de->d_name.name, de->d_name.len,
				     0, LUSTRE_OPC_ANY, NULL);
	if (IS_ERR(op_data))
		return PTR_ERR(op_data);

	if (!IS_POSIXACL(parent) || !exp_connect_umask(exp))
		it->it_create_mode &= ~current_umask();
	it->it_create_mode |= M_CHECK_STALE;
	rc = md_intent_lock(exp, op_data, NULL, 0, it,
			    lookup_flags,
			    &req, ll_md_blocking_ast, 0);
	it->it_create_mode &= ~M_CHECK_STALE;
	ll_finish_md_op_data(op_data);

	/* If req is NULL, then md_intent_lock only tried to do a lock match;
	 * if all was well, it will return 1 if it found locks, 0 otherwise. */
	if (req == NULL && rc >= 0) {
		if (!rc)
			goto do_lookup;
		GOTO(out, rc);
	}

	if (rc < 0) {
		if (rc != -ESTALE) {
			CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
			       "%d\n", rc, it->d.lustre.it_status);
		}
		GOTO(out, rc = 0);
	}

revalidate_finish:
	rc = ll_revalidate_it_finish(req, it, de);
	if (rc != 0) {
		if (rc != -ESTALE && rc != -ENOENT)
			ll_intent_release(it);
		GOTO(out, rc = 0);
	}

	if ((it->it_op & IT_OPEN) && de->d_inode &&
	    !S_ISREG(de->d_inode->i_mode) &&
	    !S_ISDIR(de->d_inode->i_mode)) {
		ll_release_openhandle(de, it);
	}
	rc = 1;

out:
	/* We do not free request as it may be reused during following lookup
	 * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
	 * be freed in ll_lookup_it or in ll_intent_release. But if
	 * request was not completed, we need to free it. (bug 5154, 9903) */
	if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
		ptlrpc_req_finished(req);
	if (rc == 0) {
		/* mdt may grant layout lock for the newly created file, so
		 * release the lock to avoid leaking */
		ll_intent_drop_lock(it);
		ll_invalidate_aliases(de->d_inode);
	} else {
		__u64 bits = 0;
		__u64 matched_bits = 0;

		CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
		       "inode %p refc %d\n", de->d_name.len,
		       de->d_name.name, de, de->d_parent, de->d_inode,
		       d_count(de));

		ll_set_lock_data(exp, de->d_inode, it, &bits);

		/* Note: We have to match both LOOKUP and PERM lock
		 * here to make sure the dentry is valid and no one
		 * changing the permission.
		 * But if the client connects < 2.4 server, which will
		 * only grant LOOKUP lock, so we can only Match LOOKUP
		 * lock for old server */
		if (exp_connect_flags(ll_i2mdexp(de->d_inode)) &&
							OBD_CONNECT_LVB_TYPE)
			matched_bits =
				MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
		else
			matched_bits = MDS_INODELOCK_LOOKUP;

		if (((bits & matched_bits) == matched_bits) &&
		    d_lustre_invalid(de))
			d_lustre_revalidate(de);
		ll_lookup_finish_locks(it, de);
	}

mark:
	if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
		ll_statahead_mark(parent, de);
	return rc;

	/*
	 * This part is here to combat evil-evil race in real_lookup on 2.6
	 * kernels.  The race details are: We enter do_lookup() looking for some
	 * name, there is nothing in dcache for this name yet and d_lookup()
	 * returns NULL.  We proceed to real_lookup(), and while we do this,
	 * another process does open on the same file we looking up (most simple
	 * reproducer), open succeeds and the dentry is added. Now back to
	 * us. In real_lookup() we do d_lookup() again and suddenly find the
	 * dentry, so we call d_revalidate on it, but there is no lock, so
	 * without this code we would return 0, but unpatched real_lookup just
	 * returns -ENOENT in such a case instead of retrying the lookup. Once
	 * this is dealt with in real_lookup(), all of this ugly mess can go and
	 * we can just check locks in ->d_revalidate without doing any RPCs
	 * ever.
	 */
do_lookup:
	if (it != &lookup_it) {
		/* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
		if (it->it_op == IT_GETATTR)
			lookup_it.it_op = IT_GETATTR;
		ll_lookup_finish_locks(it, de);
		it = &lookup_it;
	}
	if (d_need_statahead(dir, dentry) <= 0)
		return 1;

	/* Do real lookup here. */
	op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
				     de->d_name.len, 0, (it->it_op & IT_CREAT ?
							 LUSTRE_OPC_CREATE :
							 LUSTRE_OPC_ANY), NULL);
	if (IS_ERR(op_data))
		return PTR_ERR(op_data);

	rc = md_intent_lock(exp, op_data, NULL, 0,  it, 0, &req,
			    ll_md_blocking_ast, 0);
	if (rc >= 0) {
		struct mdt_body *mdt_body;
		struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
		mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);

		if (de->d_inode)
			fid = *ll_inode2fid(de->d_inode);

		/* see if we got same inode, if not - return error */
		if (lu_fid_eq(&fid, &mdt_body->fid1)) {
			ll_finish_md_op_data(op_data);
			op_data = NULL;
			goto revalidate_finish;
		}
		ll_intent_release(it);
	}
	ll_finish_md_op_data(op_data);
	GOTO(out, rc = 0);
	if (lookup_flags & LOOKUP_RCU)
		return -ECHILD;

out_sa:
	/*
	 * For rc == 1 case, should not return directly to prevent losing
	 * statahead windows; for rc == 0 case, the "lookup" will be done later.
	 */
	if (it != NULL && it->it_op == IT_GETATTR && rc == 1)
		ll_statahead_enter(parent, &de, 1);
	goto mark;
	do_statahead_enter(dir, &dentry, dentry->d_inode == NULL);
	ll_statahead_mark(dir, dentry);
	return 1;
}

/*
@@ -591,24 +358,13 @@ out_sa:
 */
int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
{
	struct inode *parent = dentry->d_parent->d_inode;
	int unplug = 0;
	int rc;

	CDEBUG(D_VFSTRACE, "VFS Op:name=%s, flags=%u\n",
	       dentry->d_name.name, flags);

	if (!(flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) &&
	    ll_need_statahead(parent, dentry) > 0) {
		if (flags & LOOKUP_RCU)
			return -ECHILD;

		if (dentry->d_inode == NULL)
			unplug = 1;
		do_statahead_enter(parent, &dentry, unplug);
		ll_statahead_mark(parent, dentry);
	}

	return 1;
	rc = ll_revalidate_dentry(dentry, flags);
	return rc;
}


+2 −6
Original line number Diff line number Diff line
@@ -446,8 +446,7 @@ static int ll_intent_file_open(struct file *file, void *lmm,
				 itp, NULL);

out:
	ptlrpc_req_finished(itp->d.lustre.it_data);
	it_clear_disposition(itp, DISP_ENQ_COMPLETE);
	ptlrpc_req_finished(req);
	ll_intent_drop_lock(itp);

	return rc;
@@ -815,10 +814,7 @@ struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
	 * doesn't deal with openhandle, so normal openhandle will be leaked. */
				LDLM_FL_NO_LRU | LDLM_FL_EXCL);
	ll_finish_md_op_data(op_data);
	if (req != NULL) {
	ptlrpc_req_finished(req);
		it_clear_disposition(&it, DISP_ENQ_COMPLETE);
	}
	if (rc < 0)
		GOTO(out_release_it, rc);

+2 −2
Original line number Diff line number Diff line
@@ -1309,7 +1309,7 @@ ll_statahead_mark(struct inode *dir, struct dentry *dentry)
}

static inline int
ll_need_statahead(struct inode *dir, struct dentry *dentryp)
d_need_statahead(struct inode *dir, struct dentry *dentryp)
{
	struct ll_inode_info  *lli;
	struct ll_dentry_data *ldd;
@@ -1354,7 +1354,7 @@ ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
{
	int ret;

	ret = ll_need_statahead(dir, *dentryp);
	ret = d_need_statahead(dir, *dentryp);
	if (ret <= 0)
		return ret;

+0 −1
Original line number Diff line number Diff line
@@ -119,7 +119,6 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
	CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
	       PFID(&body->fid1), tgt->ltd_idx);

	it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
			    flags, &req, cb_blocking, extra_lock_flags);
	if (rc)
Loading