Commit 3c4a594b authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'update-sockmap-from-prog'

Lorenz Bauer says:

====================
We're currently building a control plane for our BPF socket dispatch
work. As part of that, we have a need to create a copy of an existing
sockhash, to allow us to change the keys. I previously proposed allowing
privileged userspace to look up sockets, which doesn't work due to
security concerns (see [1]).

In follow up discussions during BPF office hours we identified bpf_iter
as a possible solution: instead of accessing sockets from user space
we can iterate the source sockhash, and insert the values into a new
map. Enabling this requires two pieces: the ability to iterate
sockmap and sockhash, as well as being able to call map_update_elem
from BPF.

This patch set implements the latter: it's now possible to update
sockmap from BPF context. As a next step, we can implement bpf_iter
for sockmap.

===

I've done some more fixups, and audited the safe contexts more
thoroughly. As a result I'm removing CGROUP_SKB, SK_MSG and SK_SKB
for now.

Changes in v3:
- Use CHECK as much as possible (Yonghong)
- Reject ARG_PTR_TO_MAP_VALUE_OR_NULL for sockmap (Yonghong)
- Remove CGROUP_SKB, SK_MSG, SK_SKB from safe contexts
- Test that the verifier rejects update from unsafe context

Changes in v2:
- Fix warning in patch #2 (Jakub K)
- Renamed override_map_arg_type (John)
- Only allow updating sockmap from known safe contexts (John)
- Use __s64 for sockmap updates from user space (Yonghong)
- Various small test fixes around test macros and such (Yonghong)

Thank your for your reviews!

1: https://lore.kernel.org/bpf/20200310174711.7490-1-lmb@cloudflare.com/


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents dca5612f bb23c0e1
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -1648,6 +1648,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
			 struct bpf_prog *old, u32 which);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
@@ -1669,6 +1670,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr,
{
	return -EOPNOTSUPP;
}

static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
					   u64 flags)
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_STREAM_PARSER */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+0 −17
Original line number Diff line number Diff line
@@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
					 struct sk_psock *psock,
					 struct proto *ops)
{
	/* Initialize saved callbacks and original proto only once, since this
	 * function may be called multiple times for a psock, e.g. when
	 * psock->progs.msg_parser is updated.
	 *
	 * Since we've not installed the new proto, psock is not yet in use and
	 * we can initialize it without synchronization.
	 */
	if (!psock->sk_proto) {
		struct proto *orig = READ_ONCE(sk->sk_prot);

		psock->saved_unhash = orig->unhash;
		psock->saved_close = orig->close;
		psock->saved_write_space = sk->sk_write_space;

		psock->sk_proto = orig;
	}

	/* Pairs with lockless read in sk_clone_lock() */
	WRITE_ONCE(sk->sk_prot, ops);
}
+3 −2
Original line number Diff line number Diff line
@@ -157,10 +157,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
	if (bpf_map_is_dev_bound(map)) {
		return bpf_map_offload_update_elem(map, key, value, flags);
	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
		   map->map_type == BPF_MAP_TYPE_SOCKHASH ||
		   map->map_type == BPF_MAP_TYPE_SOCKMAP ||
		   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
		return map->ops->map_update_elem(map, key, value, flags);
	} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
		   map->map_type == BPF_MAP_TYPE_SOCKMAP) {
		return sock_map_update_elem_sys(map, key, value, flags);
	} else if (IS_FD_PROG_ARRAY(map)) {
		return bpf_fd_array_map_update_elem(map, f.file, key, value,
						    flags);
+71 −2
Original line number Diff line number Diff line
@@ -3872,6 +3872,33 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
	return -EINVAL;
}

static int resolve_map_arg_type(struct bpf_verifier_env *env,
				 const struct bpf_call_arg_meta *meta,
				 enum bpf_arg_type *arg_type)
{
	if (!meta->map_ptr) {
		/* kernel subsystem misconfigured verifier */
		verbose(env, "invalid map_ptr to access map->type\n");
		return -EACCES;
	}

	switch (meta->map_ptr->map_type) {
	case BPF_MAP_TYPE_SOCKMAP:
	case BPF_MAP_TYPE_SOCKHASH:
		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
			*arg_type = ARG_PTR_TO_SOCKET;
		} else {
			verbose(env, "invalid arg_type for sockmap/sockhash\n");
			return -EINVAL;
		}
		break;

	default:
		break;
	}
	return 0;
}

static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
			  struct bpf_call_arg_meta *meta,
			  const struct bpf_func_proto *fn)
@@ -3904,6 +3931,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
		return -EACCES;
	}

	if (arg_type == ARG_PTR_TO_MAP_VALUE ||
	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
		err = resolve_map_arg_type(env, meta, &arg_type);
		if (err)
			return err;
	}

	if (arg_type == ARG_PTR_TO_MAP_KEY ||
	    arg_type == ARG_PTR_TO_MAP_VALUE ||
	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
@@ -4143,6 +4178,38 @@ err_type:
	return -EACCES;
}

static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
{
	enum bpf_attach_type eatype = env->prog->expected_attach_type;
	enum bpf_prog_type type = env->prog->type;

	if (func_id != BPF_FUNC_map_update_elem)
		return false;

	/* It's not possible to get access to a locked struct sock in these
	 * contexts, so updating is safe.
	 */
	switch (type) {
	case BPF_PROG_TYPE_TRACING:
		if (eatype == BPF_TRACE_ITER)
			return true;
		break;
	case BPF_PROG_TYPE_SOCKET_FILTER:
	case BPF_PROG_TYPE_SCHED_CLS:
	case BPF_PROG_TYPE_SCHED_ACT:
	case BPF_PROG_TYPE_XDP:
	case BPF_PROG_TYPE_SK_REUSEPORT:
	case BPF_PROG_TYPE_FLOW_DISSECTOR:
	case BPF_PROG_TYPE_SK_LOOKUP:
		return true;
	default:
		break;
	}

	verbose(env, "cannot update sockmap in this context\n");
	return false;
}

static int check_map_func_compatibility(struct bpf_verifier_env *env,
					struct bpf_map *map, int func_id)
{
@@ -4214,7 +4281,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
		    func_id != BPF_FUNC_map_delete_elem &&
		    func_id != BPF_FUNC_msg_redirect_map &&
		    func_id != BPF_FUNC_sk_select_reuseport &&
		    func_id != BPF_FUNC_map_lookup_elem)
		    func_id != BPF_FUNC_map_lookup_elem &&
		    !may_update_sockmap(env, func_id))
			goto error;
		break;
	case BPF_MAP_TYPE_SOCKHASH:
@@ -4223,7 +4291,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
		    func_id != BPF_FUNC_map_delete_elem &&
		    func_id != BPF_FUNC_msg_redirect_hash &&
		    func_id != BPF_FUNC_sk_select_reuseport &&
		    func_id != BPF_FUNC_map_lookup_elem)
		    func_id != BPF_FUNC_map_lookup_elem &&
		    !may_update_sockmap(env, func_id))
			goto error;
		break;
	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+28 −6
Original line number Diff line number Diff line
@@ -494,14 +494,34 @@ end:

struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
	struct sk_psock *psock = kzalloc_node(sizeof(*psock),
					      GFP_ATOMIC | __GFP_NOWARN,
					      node);
	if (!psock)
		return NULL;
	struct sk_psock *psock;
	struct proto *prot;

	write_lock_bh(&sk->sk_callback_lock);

	if (inet_csk_has_ulp(sk)) {
		psock = ERR_PTR(-EINVAL);
		goto out;
	}

	if (sk->sk_user_data) {
		psock = ERR_PTR(-EBUSY);
		goto out;
	}

	psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
	if (!psock) {
		psock = ERR_PTR(-ENOMEM);
		goto out;
	}

	prot = READ_ONCE(sk->sk_prot);
	psock->sk = sk;
	psock->eval = __SK_NONE;
	psock->sk_proto = prot;
	psock->saved_unhash = prot->unhash;
	psock->saved_close = prot->close;
	psock->saved_write_space = sk->sk_write_space;

	INIT_LIST_HEAD(&psock->link);
	spin_lock_init(&psock->link_lock);
@@ -516,6 +536,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
	rcu_assign_sk_user_data_nocopy(sk, psock);
	sock_hold(sk);

out:
	write_unlock_bh(&sk->sk_callback_lock);
	return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
Loading