Commit 913fadc5 authored by Anna Schumaker's avatar Anna Schumaker
Browse files

NFS: Fix interrupted slots by sending a solo SEQUENCE operation



We used to do this before 3453d570, but this was changed to better
handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed the slot
re-use case when the server doesn't receive the interrupted operation,
but if the server does receive the operation then it could still end up
replying to the client with mis-matched operations from the reply cache.

We can fix this by sending a SEQUENCE to the server while recovering from
a SEQ_MISORDERED error when we detect that we are in an interrupted slot
situation.

Fixes: 3453d570 (NFSv4.1: Avoid false retries when RPC calls are interrupted)
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent af667527
Loading
Loading
Loading
Loading
+18 −2
Original line number Diff line number Diff line
@@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
	slot->seq_nr_last_acked = seqnr;
}

static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred,
				struct nfs4_slot *slot)
{
	struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true);
	if (!IS_ERR(task))
		rpc_put_task_async(task);
}

static int nfs41_sequence_process(struct rpc_task *task,
		struct nfs4_sequence_res *res)
{
@@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
		goto out;

	session = slot->table->session;
	clp = session->clp;

	trace_nfs4_sequence_done(session, res);

@@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
		nfs4_slot_sequence_acked(slot, slot->seq_nr);
		/* Update the slot's sequence and clientid lease timer */
		slot->seq_done = 1;
		clp = session->clp;
		do_renew_lease(clp, res->sr_timestamp);
		/* Check sequence flags */
		nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags,
@@ -852,10 +860,18 @@ static int nfs41_sequence_process(struct rpc_task *task,
		/*
		 * Were one or more calls using this slot interrupted?
		 * If the server never received the request, then our
		 * transmitted slot sequence number may be too high.
		 * transmitted slot sequence number may be too high. However,
		 * if the server did receive the request then it might
		 * accidentally give us a reply with a mismatched operation.
		 * We can sort this out by sending a lone sequence operation
		 * to the server on the same slot.
		 */
		if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
			slot->seq_nr--;
			if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) {
				nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot);
				res->sr_slot = NULL;
			}
			goto retry_nowait;
		}
		/*