Commit 9990a79d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-tls-add-a-TX-lock'



Jakub Kicinski says:

====================
net/tls: add a TX lock

Some time ago Pooja and Mallesham started reporting crashes with
an async accelerator. After trying to poke the existing logic into
shape I came to the conclusion that it can't be trusted, and to
preserve our sanity we should just add a lock around the TX side.

First patch removes the sk_write_pending checks from the write
space callbacks. Those don't seem to have a logical justification.

Patch 2 adds the TX lock and patch 3 associated test (which should
hang with current net).

Mallesham reports that even with these fixes applied the async
accelerator workload still occasionally hangs waiting for socket
memory. I suspect that's strictly related to the way async crypto
is integrated in TLS, so I think we should get these into net or
net-next and move from there.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 17fdd763 41098af5
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/skmsg.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>

@@ -269,6 +270,10 @@ struct tls_context {

	bool in_tcp_sendpages;
	bool pending_open_record_frags;

	struct mutex tx_lock; /* protects partially_sent_* fields and
			       * per-type TX fields
			       */
	unsigned long flags;

	/* cache cold stuff */
+9 −1
Original line number Diff line number Diff line
@@ -523,8 +523,10 @@ last_record:
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
	unsigned char record_type = TLS_RECORD_TYPE_DATA;
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	int rc;

	mutex_lock(&tls_ctx->tx_lock);
	lock_sock(sk);

	if (unlikely(msg->msg_controllen)) {
@@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)

out:
	release_sock(sk);
	mutex_unlock(&tls_ctx->tx_lock);
	return rc;
}

int tls_device_sendpage(struct sock *sk, struct page *page,
			int offset, size_t size, int flags)
{
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	struct iov_iter	msg_iter;
	char *kaddr = kmap(page);
	struct kvec iov;
@@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
	if (flags & MSG_SENDPAGE_NOTLAST)
		flags |= MSG_MORE;

	mutex_lock(&tls_ctx->tx_lock);
	lock_sock(sk);

	if (flags & MSG_OOB) {
@@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,

out:
	release_sock(sk);
	mutex_unlock(&tls_ctx->tx_lock);
	return rc;
}

@@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)

void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
{
	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
	if (tls_is_partially_sent_record(ctx)) {
		gfp_t sk_allocation = sk->sk_allocation;

		WARN_ON_ONCE(sk->sk_write_pending);

		sk->sk_allocation = GFP_ATOMIC;
		tls_push_partial_record(sk, ctx,
					MSG_DONTWAIT | MSG_NOSIGNAL |
+2 −0
Original line number Diff line number Diff line
@@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx)

	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
	mutex_destroy(&ctx->tx_lock);

	if (sk)
		kfree_rcu(ctx, rcu);
@@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk)
	if (!ctx)
		return NULL;

	mutex_init(&ctx->tx_lock);
	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
	ctx->sk_proto = sk->sk_prot;
	return ctx;
+10 −20
Original line number Diff line number Diff line
@@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
		return -ENOTSUPP;

	mutex_lock(&tls_ctx->tx_lock);
	lock_sock(sk);

	/* Wait till there is any pending write on socket */
	if (unlikely(sk->sk_write_pending)) {
		ret = wait_on_pending_writer(sk, &timeo);
		if (unlikely(ret))
			goto send_end;
	}

	if (unlikely(msg->msg_controllen)) {
		ret = tls_proccess_cmsg(sk, msg, &record_type);
		if (ret) {
@@ -1091,6 +1085,7 @@ send_end:
	ret = sk_stream_error(sk, msg->msg_flags, ret);

	release_sock(sk);
	mutex_unlock(&tls_ctx->tx_lock);
	return copied ? copied : ret;
}

@@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

	/* Wait till there is any pending write on socket */
	if (unlikely(sk->sk_write_pending)) {
		ret = wait_on_pending_writer(sk, &timeo);
		if (unlikely(ret))
			goto sendpage_end;
	}

	/* Call the sk_stream functions to manage the sndbuf mem. */
	while (size > 0) {
		size_t copy, required_size;
@@ -1219,15 +1207,18 @@ sendpage_end:
int tls_sw_sendpage(struct sock *sk, struct page *page,
		    int offset, size_t size, int flags)
{
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	int ret;

	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
		return -ENOTSUPP;

	mutex_lock(&tls_ctx->tx_lock);
	lock_sock(sk);
	ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
	release_sock(sk);
	mutex_unlock(&tls_ctx->tx_lock);
	return ret;
}

@@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work)

	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
		return;
	mutex_lock(&tls_ctx->tx_lock);
	lock_sock(sk);
	tls_tx_records(sk, -1);
	release_sock(sk);
	mutex_unlock(&tls_ctx->tx_lock);
}

void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
@@ -2180,13 +2173,10 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);

	/* Schedule the transmission if tx list is ready */
	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
		/* Schedule the transmission */
		if (!test_and_set_bit(BIT_TX_SCHEDULED,
				      &tx_ctx->tx_bitmask))
	if (is_tx_ready(tx_ctx) &&
	    !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
		schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
}

void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
{
+108 −0
Original line number Diff line number Diff line
@@ -898,6 +898,114 @@ TEST_F(tls, nonblocking)
	}
}

static void
test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self,
	       bool sendpg, unsigned int n_readers, unsigned int n_writers)
{
	const unsigned int n_children = n_readers + n_writers;
	const size_t data = 6 * 1000 * 1000;
	const size_t file_sz = data / 100;
	size_t read_bias, write_bias;
	int i, fd, child_id;
	char buf[file_sz];
	pid_t pid;

	/* Only allow multiples for simplicity */
	ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true);
	read_bias = n_writers / n_readers ?: 1;
	write_bias = n_readers / n_writers ?: 1;

	/* prep a file to send */
	fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600);
	ASSERT_GE(fd, 0);

	memset(buf, 0xac, file_sz);
	ASSERT_EQ(write(fd, buf, file_sz), file_sz);

	/* spawn children */
	for (child_id = 0; child_id < n_children; child_id++) {
		pid = fork();
		ASSERT_NE(pid, -1);
		if (!pid)
			break;
	}

	/* parent waits for all children */
	if (pid) {
		for (i = 0; i < n_children; i++) {
			int status;

			wait(&status);
			EXPECT_EQ(status, 0);
		}

		return;
	}

	/* Split threads for reading and writing */
	if (child_id < n_readers) {
		size_t left = data * read_bias;
		char rb[8001];

		while (left) {
			int res;

			res = recv(self->cfd, rb,
				   left > sizeof(rb) ? sizeof(rb) : left, 0);

			EXPECT_GE(res, 0);
			left -= res;
		}
	} else {
		size_t left = data * write_bias;

		while (left) {
			int res;

			ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0);
			if (sendpg)
				res = sendfile(self->fd, fd, NULL,
					       left > file_sz ? file_sz : left);
			else
				res = send(self->fd, buf,
					   left > file_sz ? file_sz : left, 0);

			EXPECT_GE(res, 0);
			left -= res;
		}
	}
}

TEST_F(tls, mutliproc_even)
{
	test_mutliproc(_metadata, self, false, 6, 6);
}

TEST_F(tls, mutliproc_readers)
{
	test_mutliproc(_metadata, self, false, 4, 12);
}

TEST_F(tls, mutliproc_writers)
{
	test_mutliproc(_metadata, self, false, 10, 2);
}

TEST_F(tls, mutliproc_sendpage_even)
{
	test_mutliproc(_metadata, self, true, 6, 6);
}

TEST_F(tls, mutliproc_sendpage_readers)
{
	test_mutliproc(_metadata, self, true, 4, 12);
}

TEST_F(tls, mutliproc_sendpage_writers)
{
	test_mutliproc(_metadata, self, true, 10, 2);
}

TEST_F(tls, control_msg)
{
	if (self->notls)