Commit b9d37bbb authored by David S. Miller's avatar David S. Miller
Browse files


Alexei Starovoitov says:

====================
pull-request: bpf 2020-06-17

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 2 day(s) which contain
a total of 14 files changed, 158 insertions(+), 59 deletions(-).

The main changes are:

1) Important fix for bpf_probe_read_kernel_str() return value, from Andrii.

2) [gs]etsockopt fix for large optlen, from Stanislav.

3) devmap allocation fix, from Toke.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 69119673 8030e250
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -86,6 +86,20 @@ then the next program in the chain (A) will see those changes,
*not* the original input ``setsockopt`` arguments. The potentially
modified values will be then passed down to the kernel.

Large optval
============
When the ``optval`` is greater than the ``PAGE_SIZE``, the BPF program
can access only the first ``PAGE_SIZE`` of that data. So it has to options:

* Set ``optlen`` to zero, which indicates that the kernel should
  use the original buffer from the userspace. Any modifications
  done by the BPF program to the ``optval`` are ignored.
* Set ``optlen`` to the value less than ``PAGE_SIZE``, which
  indicates that the kernel should use BPF's trimmed ``optval``.

When the BPF program returns with the ``optlen`` greater than
``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.

Example
=======

+1 −1
Original line number Diff line number Diff line
@@ -3168,7 +3168,7 @@ union bpf_attr {
 *	Return
 *		The id is returned or 0 in case the id could not be retrieved.
 *
 * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
 * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
 * 	Description
 * 		Copy *size* bytes from *data* into a ring buffer *ringbuf*.
 * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+33 −20
Original line number Diff line number Diff line
@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,

static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
	if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
	if (unlikely(max_optlen < 0))
		return -EINVAL;

	if (unlikely(max_optlen > PAGE_SIZE)) {
		/* We don't expose optvals that are greater than PAGE_SIZE
		 * to the BPF program.
		 */
		max_optlen = PAGE_SIZE;
	}

	ctx->optval = kzalloc(max_optlen, GFP_USER);
	if (!ctx->optval)
		return -ENOMEM;

	ctx->optval_end = ctx->optval + max_optlen;

	return 0;
	return max_optlen;
}

static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
	 */
	max_optlen = max_t(int, 16, *optlen);

	ret = sockopt_alloc_buf(&ctx, max_optlen);
	if (ret)
		return ret;
	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
	if (max_optlen < 0)
		return max_optlen;

	ctx.optlen = *optlen;

	if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
		ret = -EFAULT;
		goto out;
	}
@@ -1353,9 +1360,15 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
		/* export any potential modifications */
		*level = ctx.level;
		*optname = ctx.optname;

		/* optlen == 0 from BPF indicates that we should
		 * use original userspace data.
		 */
		if (ctx.optlen != 0) {
			*optlen = ctx.optlen;
			*kernel_optval = ctx.optval;
		}
	}

out:
	if (ret)
@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
		return retval;

	ret = sockopt_alloc_buf(&ctx, max_optlen);
	if (ret)
		return ret;

	ctx.optlen = max_optlen;

	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
	if (max_optlen < 0)
		return max_optlen;

	if (!retval) {
		/* If kernel getsockopt finished successfully,
		 * copy whatever was returned to the user back
@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
			goto out;
		}

		if (ctx.optlen > max_optlen)
			ctx.optlen = max_optlen;

		if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
		if (copy_from_user(ctx.optval, optval,
				   min(ctx.optlen, max_optlen)) != 0) {
			ret = -EFAULT;
			goto out;
		}
@@ -1436,11 +1447,13 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
		goto out;
	}

	if (ctx.optlen != 0) {
		if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
		    put_user(ctx.optlen, optlen)) {
			ret = -EFAULT;
			goto out;
		}
	}

	ret = ctx.retval;

+6 −4
Original line number Diff line number Diff line
@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);

static struct hlist_head *dev_map_create_hash(unsigned int entries)
static struct hlist_head *dev_map_create_hash(unsigned int entries,
					      int numa_node)
{
	int i;
	struct hlist_head *hash;

	hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
	hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
	if (hash != NULL)
		for (i = 0; i < entries; i++)
			INIT_HLIST_HEAD(&hash[i]);
@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
		return -EINVAL;

	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
							   dtab->map.numa_node);
		if (!dtab->dev_index_head)
			goto free_charge;

@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
			}
		}

		kfree(dtab->dev_index_head);
		bpf_map_area_free(dtab->dev_index_head);
	} else {
		for (i = 0; i < dtab->map.max_entries; i++) {
			struct bpf_dtab_netdev *dev;
+1 −1
Original line number Diff line number Diff line
@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
	if (unlikely(ret < 0))
		goto fail;

	return 0;
	return ret;
fail:
	memset(dst, 0, size);
	return ret;
Loading