Commit 578de2f3 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-tls-minor-micro-optimizations'



Jakub Kicinski says:

====================
net/tls: minor micro optimizations

This set brings a number of minor code changes from my tree which
don't have a noticeable impact on performance but seem reasonable
nonetheless.

First sk_msg_sg copy array is converted to a bitmap, zeroing that
structure takes a lot of time, hence we should try to keep it
small.

Next two conditions are marked as unlikely, GCC seemed to had
little trouble correctly reasoning about those.

Patch 4 adds parameters to tls_device_decrypted() to avoid
walking structures, as all callers already have the relevant
pointers.

Lastly two boolean members of TLS context structures are
converted to a bitfield.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8211fbfa bc76e5bb
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -28,13 +28,14 @@ struct sk_msg_sg {
	u32				end;
	u32				size;
	u32				copybreak;
	bool				copy[MAX_MSG_FRAGS];
	unsigned long			copy;
	/* The extra element is used for chaining the front and sections when
	 * the list becomes partitioned (e.g. end < start). The crypto APIs
	 * require the chaining.
	 */
	struct scatterlist		data[MAX_MSG_FRAGS + 1];
};
static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS);

/* UAPI in filter.c depends on struct sk_msg_sg being first element. */
struct sk_msg {
@@ -227,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
{
	struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);

	if (msg->sg.copy[msg->sg.start]) {
	if (test_bit(msg->sg.start, &msg->sg.copy)) {
		msg->data = NULL;
		msg->data_end = NULL;
	} else {
@@ -246,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
	sg_set_page(sge, page, len, offset);
	sg_unmark_end(sge);

	msg->sg.copy[msg->sg.end] = true;
	__set_bit(msg->sg.end, &msg->sg.copy);
	msg->sg.size += len;
	sk_msg_iter_next(msg, end);
}
@@ -254,7 +255,10 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
{
	do {
		msg->sg.copy[i] = copy_state;
		if (copy_state)
			__set_bit(i, &msg->sg.copy);
		else
			__clear_bit(i, &msg->sg.copy);
		sk_msg_iter_var_next(i);
		if (i == msg->sg.end)
			break;
+8 −5
Original line number Diff line number Diff line
@@ -136,7 +136,7 @@ struct tls_sw_context_tx {
	struct list_head tx_list;
	atomic_t encrypt_pending;
	int async_notify;
	int async_capable;
	u8 async_capable:1;

#define BIT_TX_SCHEDULED	0
#define BIT_TX_CLOSING		1
@@ -152,8 +152,8 @@ struct tls_sw_context_rx {

	struct sk_buff *recv_pkt;
	u8 control;
	int async_capable;
	bool decrypted;
	u8 async_capable:1;
	u8 decrypted:1;
	atomic_t decrypt_pending;
	bool async_notify;
};
@@ -641,7 +641,8 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
void tls_device_offload_cleanup_rx(struct sock *sk);
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq);
int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
			 struct sk_buff *skb, struct strp_msg *rxm);
#else
static inline void tls_device_init(void) {}
static inline void tls_device_cleanup(void) {}
@@ -664,7 +665,9 @@ static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
static inline void
tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}

static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
static inline int
tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
		     struct sk_buff *skb, struct strp_msg *rxm)
{
	return 0;
}
+2 −2
Original line number Diff line number Diff line
@@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
	 * account for the headroom.
	 */
	bytes_sg_total = start - offset + bytes;
	if (!msg->sg.copy[i] && bytes_sg_total <= len)
	if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
		goto out;

	/* At this point we need to linearize multiple scatterlist
@@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
	/* Place newly allocated data buffer */
	sk_mem_charge(msg->sk, len);
	msg->sg.size += len;
	msg->sg.copy[new] = false;
	__clear_bit(new, &msg->sg.copy);
	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
	if (rsge.length) {
		get_page(sg_page(&rsge));
+5 −7
Original line number Diff line number Diff line
@@ -431,7 +431,7 @@ static int tls_push_data(struct sock *sk,
	    ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
		return -ENOTSUPP;

	if (sk->sk_err)
	if (unlikely(sk->sk_err))
		return -sk->sk_err;

	flags |= MSG_SENDPAGE_DECRYPTED;
@@ -452,9 +452,8 @@ static int tls_push_data(struct sock *sk,
	max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
			      prot->prepend_size;
	do {
		rc = tls_do_allocation(sk, ctx, pfrag,
				       prot->prepend_size);
		if (rc) {
		rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size);
		if (unlikely(rc)) {
			rc = sk_stream_wait_memory(sk, &timeo);
			if (!rc)
				continue;
@@ -847,11 +846,10 @@ free_buf:
	return err;
}

int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
			 struct sk_buff *skb, struct strp_msg *rxm)
{
	struct tls_context *tls_ctx = tls_get_ctx(sk);
	struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
	struct strp_msg *rxm = strp_msg(skb);
	int is_decrypted = skb->decrypted;
	int is_encrypted = !is_decrypted;
	struct sk_buff *skb_iter;
+7 −6
Original line number Diff line number Diff line
@@ -1495,7 +1495,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,

	if (!ctx->decrypted) {
		if (tls_ctx->rx_conf == TLS_HW) {
			err = tls_device_decrypted(sk, skb);
			err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
			if (err < 0)
				return err;
		}
@@ -1523,7 +1523,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
		rxm->offset += prot->prepend_size;
		rxm->full_len -= prot->overhead_size;
		tls_advance_record_sn(sk, prot, &tls_ctx->rx);
		ctx->decrypted = true;
		ctx->decrypted = 1;
		ctx->saved_data_ready(sk);
	} else {
		*zc = false;
@@ -1933,7 +1933,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
			tls_err_abort(sk, EBADMSG);
			goto splice_read_end;
		}
		ctx->decrypted = true;
		ctx->decrypted = 1;
	}
	rxm = strp_msg(skb);

@@ -2034,7 +2034,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);

	ctx->decrypted = false;
	ctx->decrypted = 0;

	ctx->recv_pkt = skb;
	strp_pause(strp);
@@ -2391,10 +2391,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
		tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);

		if (crypto_info->version == TLS_1_3_VERSION)
			sw_ctx_rx->async_capable = false;
			sw_ctx_rx->async_capable = 0;
		else
			sw_ctx_rx->async_capable =
				tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
				!!(tfm->__crt_alg->cra_flags &
				   CRYPTO_ALG_ASYNC);

		/* Set up strparser */
		memset(&cb, 0, sizeof(cb));