Commit 599377ac authored by Philipp Reisner's avatar Philipp Reisner Committed by Jens Axboe
Browse files

drbd: Avoid NetworkFailure state during disconnect



Disconnecting is a cluster wide state change. In case the peer node agrees
to the state transition, it sends back the fact on the meta-data connection
and closes both sockets.

In case the node node that initiated the state transfer sees the closing
action on the data-socket, before the P_STATE_CHG_REPLY packet, it was
going into one of the network failure states.

At least with the fencing option set to something else thatn "dont-care",
the unclean shutdown of the connection causes a short IO freeze or
a fence operation.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c12a3d8c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -857,6 +857,7 @@ enum {
				 * so shrink_page_list() would not recurse into,
				 * and potentially deadlock on, this drbd worker.
				 */
	DISCONNECT_SENT,	/* Currently the last bit in this 32bit word */
};

struct drbd_bitmap; /* opaque for drbd_conf */
+3 −0
Original line number Diff line number Diff line
@@ -659,6 +659,9 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
			goto abort;
		}

		if (mask.conn == C_MASK && val.conn == C_DISCONNECTING)
			set_bit(DISCONNECT_SENT, &mdev->flags);

		wait_event(mdev->state_wait,
			(rv = _req_st_cond(mdev, mask, val)));

+20 −1
Original line number Diff line number Diff line
@@ -534,7 +534,6 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
				dev_err(DEV, "sock_recvmsg returned %d\n", rv);
			break;
		} else if (rv == 0) {
			dev_info(DEV, "sock was shut down by peer\n");
			break;
		} else	{
			/* signal came in, or peer/link went down,
@@ -547,9 +546,21 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)

	set_fs(oldfs);

	if (rv == 0) {
		if (test_bit(DISCONNECT_SENT, &mdev->flags)) {
			long t; /* time_left */
			t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED,
					       mdev->net_conf->ping_timeo * HZ/10);
			if (t)
				goto out;
		}
		dev_info(DEV, "sock was shut down by peer\n");
	}

	if (rv != size)
		drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));

out:
	return rv;
}

@@ -760,6 +771,7 @@ static int drbd_connect(struct drbd_conf *mdev)

	D_ASSERT(!mdev->data.socket);

	clear_bit(DISCONNECT_SENT, &mdev->flags);
	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
		return -2;

@@ -4680,6 +4692,13 @@ int drbd_asender(struct drbd_thread *thi)
			received += rv;
			buf	 += rv;
		} else if (rv == 0) {
			if (test_bit(DISCONNECT_SENT, &mdev->flags)) {
				long t; /* time_left */
				t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED,
						       mdev->net_conf->ping_timeo * HZ/10);
				if (t)
					break;
			}
			dev_err(DEV, "meta connection shut down by peer.\n");
			goto reconnect;
		} else if (rv == -EAGAIN) {