Commit 055923bf authored by Alexander Aring's avatar Alexander Aring Committed by David Teigland
Browse files

fs: dlm: implement tcp graceful shutdown



During my code inspection I saw there is no implementation of a graceful
shutdown for tcp. This patch will introduce a graceful shutdown for tcp
connections. The shutdown is implemented synchronized as
dlm_lowcomms_stop() is called to end all dlm communication. After shutdown
is done, a lot of flush and closing functionality will be called. However
I don't see a problem with that.

The waitqueue for synchronize the shutdown has a timeout of 10 seconds, if
timeout a force close will be exectued.

Signed-off-by: default avatarAlexander Aring <aahringo@redhat.com>
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent ba3ab3ca
Loading
Loading
Loading
Loading
+72 −5
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@

/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)

struct cbuf {
	unsigned int base;
@@ -110,10 +111,12 @@ struct connection {
#define CF_CLOSE 6
#define CF_APP_LIMITED 7
#define CF_CLOSING 8
#define CF_SHUTDOWN 9
	struct list_head writequeue;  /* List of outgoing writequeue_entries */
	spinlock_t writequeue_lock;
	int (*rx_action) (struct connection *);	/* What to do when active */
	void (*connect_action) (struct connection *);	/* What to do to connect */
	void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
	struct page *rx_page;
	struct cbuf cb;
	int retries;
@@ -122,6 +125,7 @@ struct connection {
	struct connection *othercon;
	struct work_struct rwork; /* Receive workqueue */
	struct work_struct swork; /* Send workqueue */
	wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)

@@ -218,6 +222,7 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
	spin_lock_init(&con->writequeue_lock);
	INIT_WORK(&con->swork, process_send_sockets);
	INIT_WORK(&con->rwork, process_recv_sockets);
	init_waitqueue_head(&con->shutdown_wait);

	/* Setup action pointers for child sockets */
	if (con->nodeid) {
@@ -619,6 +624,54 @@ static void close_connection(struct connection *con, bool and_other,
	clear_bit(CF_CLOSING, &con->flags);
}

static void shutdown_connection(struct connection *con)
{
	int ret;

	if (cancel_work_sync(&con->swork)) {
		log_print("canceled swork for node %d", con->nodeid);
		clear_bit(CF_WRITE_PENDING, &con->flags);
	}

	mutex_lock(&con->sock_mutex);
	/* nothing to shutdown */
	if (!con->sock) {
		mutex_unlock(&con->sock_mutex);
		return;
	}

	set_bit(CF_SHUTDOWN, &con->flags);
	ret = kernel_sock_shutdown(con->sock, SHUT_WR);
	mutex_unlock(&con->sock_mutex);
	if (ret) {
		log_print("Connection %p failed to shutdown: %d will force close",
			  con, ret);
		goto force_close;
	} else {
		ret = wait_event_timeout(con->shutdown_wait,
					 !test_bit(CF_SHUTDOWN, &con->flags),
					 DLM_SHUTDOWN_WAIT_TIMEOUT);
		if (ret == 0) {
			log_print("Connection %p shutdown timed out, will force close",
				  con);
			goto force_close;
		}
	}

	return;

force_close:
	clear_bit(CF_SHUTDOWN, &con->flags);
	close_connection(con, false, true, true);
}

static void dlm_tcp_shutdown(struct connection *con)
{
	if (con->othercon)
		shutdown_connection(con->othercon);
	shutdown_connection(con);
}

/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
@@ -713,13 +766,18 @@ out_resched:
out_close:
	mutex_unlock(&con->sock_mutex);
	if (ret != -EAGAIN) {
		close_connection(con, false, true, false);
		/* Reconnect when there is something to send */
		close_connection(con, false, true, false);
		if (ret == 0) {
			log_print("connection %p got EOF from %d",
				  con, con->nodeid);
			/* handling for tcp shutdown */
			clear_bit(CF_SHUTDOWN, &con->flags);
			wake_up(&con->shutdown_wait);
			/* signal to breaking receive worker */
			ret = -1;
		}
	}
	/* Don't return success if we really got EOF */
	if (ret == 0)
		ret = -EAGAIN;

	return ret;
}

@@ -803,6 +861,7 @@ static int accept_from_sock(struct connection *con)
			spin_lock_init(&othercon->writequeue_lock);
			INIT_WORK(&othercon->swork, process_send_sockets);
			INIT_WORK(&othercon->rwork, process_recv_sockets);
			init_waitqueue_head(&othercon->shutdown_wait);
			set_bit(CF_IS_OTHERCON, &othercon->flags);
		} else {
			/* close other sock con if we have something new */
@@ -1047,6 +1106,7 @@ static void tcp_connect_to_sock(struct connection *con)

	con->rx_action = receive_from_sock;
	con->connect_action = tcp_connect_to_sock;
	con->shutdown_action = dlm_tcp_shutdown;
	add_sock(sock, con);

	/* Bind to our cluster-known address connecting to avoid
@@ -1542,6 +1602,12 @@ static void stop_conn(struct connection *con)
	_stop_conn(con, true);
}

static void shutdown_conn(struct connection *con)
{
	if (con->shutdown_action)
		con->shutdown_action(con);
}

static void free_conn(struct connection *con)
{
	close_connection(con, true, true, true);
@@ -1593,6 +1659,7 @@ void dlm_lowcomms_stop(void)
	mutex_lock(&connections_lock);
	dlm_allow_conn = 0;
	mutex_unlock(&connections_lock);
	foreach_conn(shutdown_conn);
	work_flush();
	clean_writequeues();
	foreach_conn(free_conn);