Commit 74765da1 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_iter_tcp_udp'

Yonghong Song says:

====================
bpf iterator implments traversal of kernel data structures and these
data structures are passed to a bpf program for processing.
This gives great flexibility for users to examine kernel data
structure without using e.g. /proc/net which has limited and
fixed format.

Commit 138d0be3 ("net: bpf: Add netlink and ipv6_route bpf_iter targets")
implemented bpf iterators for netlink and ipv6_route.
This patch set intends to implement bpf iterators for tcp and udp.

Currently, /proc/net/tcp is used to print tcp4 stats and /proc/net/tcp6
is used to print tcp6 stats. /proc/net/udp[6] have similar usage model.
In contrast, only one tcp iterator is implemented and it is bpf program
resposibility to filter based on socket family. The same is for udp.
This will avoid another unnecessary traversal pass if users want
to check both tcp4 and tcp6.

Several helpers are also implemented in this patch
  bpf_skc_to_{tcp, tcp6, tcp_timewait, tcp_request, udp6}_sock
The argument for these helpers is not a fixed btf_id. For example,
  bpf_skc_to_tcp(struct sock_common *), or
  bpf_skc_to_tcp(struct sock *), or
  bpf_skc_to_tcp(struct inet_sock *), ...
are all valid. At runtime, the helper will check whether pointer cast
is legal or not. Please see Patch #5 for details.

Since btf_id's for both arguments and return value are known at
build time, the btf_id's are pre-computed once vmlinux btf becomes
valid. Jiri's "adding d_path helper" patch set
  https://lore.kernel.org/bpf/20200616100512.2168860-1-jolsa@kernel.org/T/


provides a way to pre-compute btf id during vmlinux build time.
This can be applied here as well. A followup patch can convert
to build time btf id computation after Jiri's patch landed.

Changelogs:
  v4 -> v5:
    - fix bpf_skc_to_udp6_sock helper as besides sk_protocol, sk_family,
      sk_type == SOCK_DGRAM is also needed to differentiate from
      SOCK_RAW (Eric)
  v3 -> v4:
    - fix bpf_skc_to_{tcp_timewait, tcp_request}_sock helper implementation
      as just checking sk->sk_state is not enough (Martin)
    - fix a few kernel test robot reported failures
    - move bpf_tracing_net.h from libbpf to selftests (Andrii)
    - remove __weak attribute from selftests CONFIG_HZ variables (Andrii)
  v2 -> v3:
    - change sock_cast*/SOCK_CAST* names to btf_sock* names for generality (Martin)
    - change gpl_license to false (Martin)
    - fix helper to cast to tcp timewait/request socket. (Martin)
  v1 -> v2:
    - guard init_sock_cast_types() defination properly with CONFIG_NET (Martin)
    - reuse the btf_ids, computed for new helper argument, for return
      values (Martin)
    - using BTF_TYPE_EMIT to express intent of btf type generation (Andrii)
    - abstract out common net macros into bpf_tracing_net.h (Andrii)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f9bcf968 cfcd75f9
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -265,6 +265,7 @@ enum bpf_return_type {
	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
};

/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -287,6 +288,12 @@ struct bpf_func_proto {
		enum bpf_arg_type arg_type[5];
	};
	int *btf_id; /* BTF ids of arguments */
	bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is
						    * valid. Often used if more
						    * than one btf id is permitted
						    * for this argument.
						    */
	int *ret_btf_id; /* return value btf_id */
};

/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -1524,6 +1531,7 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)

struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
void init_btf_sock_ids(struct btf *btf);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
					union bpf_attr *attr)
@@ -1549,6 +1557,9 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
static inline void init_btf_sock_ids(struct btf *btf)
{
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_BPF_STREAM_PARSER)
@@ -1638,6 +1649,11 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
extern const struct bpf_func_proto bpf_ringbuf_query_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
	enum bpf_func_id func_id, const struct bpf_prog *prog);
+1 −0
Original line number Diff line number Diff line
@@ -1935,6 +1935,7 @@ struct tcp_iter_state {
	struct seq_net_private	p;
	enum tcp_seq_states	state;
	struct sock		*syn_wait_sk;
	struct tcp_seq_afinfo	*bpf_seq_afinfo;
	int			bucket, offset, sbucket, num;
	loff_t			last_pos;
};
+1 −0
Original line number Diff line number Diff line
@@ -440,6 +440,7 @@ struct udp_seq_afinfo {
struct udp_iter_state {
	struct seq_net_private  p;
	int			bucket;
	struct udp_seq_afinfo	*bpf_seq_afinfo;
};

void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+36 −1
Original line number Diff line number Diff line
@@ -3255,6 +3255,36 @@ union bpf_attr {
 * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
 * 		is returned or the error code -EACCES in case the skb is not
 * 		subject to CHECKSUM_UNNECESSARY.
 *
 * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
 *	Description
 *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 *
 * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
 *	Description
 *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 *
 * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
 * 	Description
 *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 *
 * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
 * 	Description
 *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 *
 * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
 * 	Description
 *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -3392,7 +3422,12 @@ union bpf_attr {
	FN(ringbuf_submit),		\
	FN(ringbuf_discard),		\
	FN(ringbuf_query),		\
	FN(csum_level),
	FN(csum_level),			\
	FN(skc_to_tcp6_sock),		\
	FN(skc_to_tcp_sock),		\
	FN(skc_to_tcp_timewait_sock),	\
	FN(skc_to_tcp_request_sock),	\
	FN(skc_to_udp6_sock),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+1 −0
Original line number Diff line number Diff line
@@ -3674,6 +3674,7 @@ struct btf *btf_parse_vmlinux(void)
		goto errout;

	bpf_struct_ops_init(btf, log);
	init_btf_sock_ids(btf);

	btf_verifier_env_free(env);
	refcount_set(&btf->refcnt, 1);
Loading