Commit e604aad2 authored by Chuck Lever's avatar Chuck Lever
Browse files

svcrdma: Use struct xdr_stream to decode ingress transport headers



The logic that checks incoming network headers has to be scrupulous.

De-duplicate: replace open-coded buffer overflow checks with the use
of xdr_stream helpers that are used most everywhere else XDR
decoding is done.

One minor change to the sanity checks: instead of checking the
length of individual segments, cap the length of the whole chunk
to be sure it can fit in the set of pages available in rq_pages.
This should be a better test of whether the server can handle the
chunks in each request.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 2426ddfd
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -58,7 +58,8 @@ enum {
enum {
	rpcrdma_fixed_maxsz	= 4,
	rpcrdma_segment_maxsz	= 4,
	rpcrdma_readchunk_maxsz	= 2 + rpcrdma_segment_maxsz,
	rpcrdma_readseg_maxsz	= 1 + rpcrdma_segment_maxsz,
	rpcrdma_readchunk_maxsz	= 1 + rpcrdma_readseg_maxsz,
};

/*
+1 −0
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt {
	struct ib_sge		rc_recv_sge;
	void			*rc_recv_buf;
	struct xdr_buf		rc_arg;
	struct xdr_stream	rc_stream;
	bool			rc_temp;
	u32			rc_byte_len;
	unsigned int		rc_page_count;
+4 −3
Original line number Diff line number Diff line
@@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
);

#define DEFINE_SEGMENT_EVENT(name)					\
		DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
		DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
				TP_PROTO(				\
					u32 handle,			\
					u32 length,			\
@@ -1477,8 +1477,9 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
				),					\
				TP_ARGS(handle, length, offset))

DEFINE_SEGMENT_EVENT(rseg);
DEFINE_SEGMENT_EVENT(wseg);
DEFINE_SEGMENT_EVENT(decode_wseg);
DEFINE_SEGMENT_EVENT(encode_rseg);
DEFINE_SEGMENT_EVENT(encode_wseg);

DECLARE_EVENT_CLASS(svcrdma_chunk_event,
	TP_PROTO(
+123 −83
Original line number Diff line number Diff line
@@ -358,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
	arg->len = ctxt->rc_byte_len;
}

/* This accommodates the largest possible Write chunk,
 * in one segment.
/* This accommodates the largest possible Write chunk.
 */
#define MAX_BYTES_WRITE_SEG	((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))

/* This accommodates the largest possible Position-Zero
 * Read chunk or Reply chunk, in one segment.
 * Read chunk or Reply chunk.
 */
#define MAX_BYTES_SPECIAL_SEG	((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))

/* Sanity check the Read list.
 *
@@ -374,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
 * - This implementation supports only one Read chunk.
 *
 * Sanity checks:
 * - Read list does not overflow buffer.
 * - Read list does not overflow Receive buffer.
 * - Segment size limited by largest NFS data payload.
 *
 * The segment count is limited to how many segments can
@@ -382,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
 * buffer. That's about 40 Read segments for a 1KB inline
 * threshold.
 *
 * Returns pointer to the following Write list.
 * Return values:
 *       %true: Read list is valid. @rctxt's xdr_stream is updated
 *		to point to the first byte past the Read list.
 *      %false: Read list is corrupt. @rctxt's xdr_stream is left
 *		in an unknown state.
 */
static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
{
	u32 position;
	u32 position, len;
	bool first;
	__be32 *p;

	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
	if (!p)
		return false;

	len = 0;
	first = true;
	while (*p++ != xdr_zero) {
	while (*p != xdr_zero) {
		p = xdr_inline_decode(&rctxt->rc_stream,
				      rpcrdma_readseg_maxsz * sizeof(*p));
		if (!p)
			return false;

		if (first) {
			position = be32_to_cpup(p++);
			position = be32_to_cpup(p);
			first = false;
		} else if (be32_to_cpup(p++) != position) {
			return NULL;
		} else if (be32_to_cpup(p) != position) {
			return false;
		}
		p++;	/* handle */
		if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
			return NULL;
		p += 2;	/* offset */
		p += 2;
		len += be32_to_cpup(p);

		if (p > end)
			return NULL;
		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
		if (!p)
			return false;
	}
	return p;
	return len <= MAX_BYTES_SPECIAL_CHUNK;
}

/* The segment count is limited to how many segments can
@@ -413,67 +426,93 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
 * buffer. That's about 60 Write segments for a 1KB inline
 * threshold.
 */
static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
				     u32 maxlen)
static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
{
	u32 i, segcount;
	u32 i, segcount, total;
	__be32 *p;

	segcount = be32_to_cpup(p++);
	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
	if (!p)
		return false;
	segcount = be32_to_cpup(p);

	total = 0;
	for (i = 0; i < segcount; i++) {
		p++;	/* handle */
		if (be32_to_cpup(p++) > maxlen)
			return NULL;
		p += 2;	/* offset */
		u32 handle, length;
		u64 offset;

		if (p > end)
			return NULL;
	}
		p = xdr_inline_decode(&rctxt->rc_stream,
				      rpcrdma_segment_maxsz * sizeof(*p));
		if (!p)
			return false;

		handle = be32_to_cpup(p++);
		length = be32_to_cpup(p++);
		xdr_decode_hyper(p, &offset);
		trace_svcrdma_decode_wseg(handle, length, offset);

	return p;
		total += length;
	}
	return total <= maxlen;
}

/* Sanity check the Write list.
 *
 * Implementation limits:
 * - This implementation supports only one Write chunk.
 * - This implementation currently supports only one Write chunk.
 *
 * Sanity checks:
 * - Write list does not overflow buffer.
 * - Segment size limited by largest NFS data payload.
 *
 * Returns pointer to the following Reply chunk.
 * - Write list does not overflow Receive buffer.
 * - Chunk size limited by largest NFS data payload.
 *
 * Return values:
 *       %true: Write list is valid. @rctxt's xdr_stream is updated
 *		to point to the first byte past the Write list.
 *      %false: Write list is corrupt. @rctxt's xdr_stream is left
 *		in an unknown state.
 */
static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
{
	u32 chcount;
	u32 chcount = 0;
	__be32 *p;

	chcount = 0;
	while (*p++ != xdr_zero) {
		p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
	if (!p)
			return NULL;
		if (chcount++ > 1)
			return NULL;
		return false;
	while (*p != xdr_zero) {
		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
			return false;
		++chcount;
		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
		if (!p)
			return false;
	}
	return p;
	return chcount < 2;
}

/* Sanity check the Reply chunk.
 *
 * Sanity checks:
 * - Reply chunk does not overflow buffer.
 * - Segment size limited by largest NFS data payload.
 *
 * Returns pointer to the following RPC header.
 * - Reply chunk does not overflow Receive buffer.
 * - Chunk size limited by largest NFS data payload.
 *
 * Return values:
 *       %true: Reply chunk is valid. @rctxt's xdr_stream is updated
 *		to point to the first byte past the Reply chunk.
 *      %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
 *		in an unknown state.
 */
static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
{
	if (*p++ != xdr_zero) {
		p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
	__be32 *p;

	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
	if (!p)
			return NULL;
	}
	return p;
		return false;
	if (*p != xdr_zero)
		if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
			return false;
	return true;
}

/* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -538,60 +577,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
	ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
}

/* On entry, xdr->head[0].iov_base points to first byte in the
 * RPC-over-RDMA header.
/**
 * svc_rdma_xdr_decode_req - Decode the transport header
 * @rq_arg: xdr_buf containing ingress RPC/RDMA message
 * @rctxt: state of decoding
 *
 * On entry, xdr->head[0].iov_base points to first byte of the
 * RPC-over-RDMA transport header.
 *
 * On successful exit, head[0] points to first byte past the
 * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
 *
 * The length of the RPC-over-RDMA header is returned.
 *
 * Assumptions:
 * - The transport header is entirely contained in the head iovec.
 */
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
				   struct svc_rdma_recv_ctxt *rctxt)
{
	__be32 *p, *end, *rdma_argp;
	__be32 *p, *rdma_argp;
	unsigned int hdr_len;

	/* Verify that there's enough bytes for header + something */
	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
		goto out_short;

	rdma_argp = rq_arg->head[0].iov_base;
	if (*(rdma_argp + 1) != rpcrdma_version)
		goto out_version;
	xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);

	switch (*(rdma_argp + 3)) {
	p = xdr_inline_decode(&rctxt->rc_stream,
			      rpcrdma_fixed_maxsz * sizeof(*p));
	if (unlikely(!p))
		goto out_short;
	p++;
	if (*p != rpcrdma_version)
		goto out_version;
	p += 2;
	switch (*p) {
	case rdma_msg:
		break;
	case rdma_nomsg:
		break;

	case rdma_done:
		goto out_drop;

	case rdma_error:
		goto out_drop;

	default:
		goto out_proc;
	}

	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
	p = xdr_check_read_list(rdma_argp + 4, end);
	if (!p)
	if (!xdr_check_read_list(rctxt))
		goto out_inval;
	p = xdr_check_write_list(p, end);
	if (!p)
		goto out_inval;
	p = xdr_check_reply_chunk(p, end);
	if (!p)
	if (!xdr_check_write_list(rctxt))
		goto out_inval;
	if (p > end)
	if (!xdr_check_reply_chunk(rctxt))
		goto out_inval;

	rq_arg->head[0].iov_base = p;
	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
	rq_arg->head[0].iov_base = rctxt->rc_stream.p;
	hdr_len = xdr_stream_pos(&rctxt->rc_stream);
	rq_arg->head[0].iov_len -= hdr_len;
	rq_arg->len -= hdr_len;
	trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
@@ -786,7 +826,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
	rqstp->rq_next_page = rqstp->rq_respages;

	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
	if (ret < 0)
		goto out_err;
	if (ret == 0)