Commit 6316f783 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'support-global-data'

Daniel Borkmann says:

====================
This series is a major rework of previously submitted libbpf
patches [0] in order to add global data support for BPF. The
kernel has been extended to add proper infrastructure that allows
for full .bss/.data/.rodata sections on BPF loader side based
upon feedback from LPC discussions [1]. Latter support is then
also added into libbpf in this series which allows for more
natural C-like programming of BPF programs. For more information
on loader, please refer to 'bpf, libbpf: support global data/bss/
rodata sections' patch in this series.

Thanks a lot!

  v5 -> v6:
   - Removed synchronize_rcu() from map freeze (Jann)
   - Rest as-is
  v4 -> v5:
   - Removed index selection again for ldimm64 (Alexei)
   - Adapted related test cases and added new ones to test
     rejection of off != 0
  v3 -> v4:
   - Various fixes in BTF verification e.g. to disallow
     Var and DataSec to be an intermediate type during resolve (Martin)
   - More BTF test cases added
   - Few cleanups in key-less BTF commit (Martin)
   - Bump libbpf minor version from 2 to 3
   - Renamed and simplified read-only locking
   - Various minor improvements all over the place
  v2 -> v3:
   - Implement BTF support in kernel, libbpf, bpftool, add tests
   - Fix idx + off conversion (Andrii)
   - Document lower / higher bits for direct value access (Andrii)
   - Add tests with small value size (Andrii)
   - Add index selection into ldimm64 (Andrii)
   - Fix missing fdput() (Jann)
   - Reject invalid flags in BPF_F_*_PROG (Jakub)
   - Complete rework of libbpf support, includes:
    - Add objname to map name (Stanislav)
    - Make .rodata map full read-only after setup (Andrii)
    - Merge relocation handling into single one (Andrii)
    - Store global maps into obj->maps array (Andrii, Alexei)
    - Debug message when skipping section (Andrii)
    - Reject non-static global data till we have
      semantics for sharing them (Yonghong, Andrii, Alexei)
    - More test cases and completely reworked prog test (Alexei)
   - Fixes, cleanups, etc all over the set
   - Not yet addressed:
    - Make BTF mandatory for these maps (Alexei)
    -> Waiting till BTF support for these lands first
  v1 -> v2:
    - Instead of 32-bit static data, implement full global
      data support (Alexei)

  [0] https://patchwork.ozlabs.org/cover/1040290/
  [1] http://vger.kernel.org/lpc-bpf2018.html#session-3


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents ff466b58 c861168b
Loading
Loading
Loading
Loading
+57 −0
Original line number Diff line number Diff line
@@ -82,6 +82,8 @@ sequentially and type id is assigned to each recognized type starting from id
    #define BTF_KIND_RESTRICT       11      /* Restrict     */
    #define BTF_KIND_FUNC           12      /* Function     */
    #define BTF_KIND_FUNC_PROTO     13      /* Function Proto       */
    #define BTF_KIND_VAR            14      /* Variable     */
    #define BTF_KIND_DATASEC        15      /* Section      */

Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -393,6 +395,61 @@ refers to parameter type.
If the function has variable arguments, the last parameter is encoded with
``name_off = 0`` and ``type = 0``.

2.2.14 BTF_KIND_VAR
~~~~~~~~~~~~~~~~~~~

``struct btf_type`` encoding requirement:
  * ``name_off``: offset to a valid C identifier
  * ``info.kind_flag``: 0
  * ``info.kind``: BTF_KIND_VAR
  * ``info.vlen``: 0
  * ``type``: the type of the variable

``btf_type`` is followed by a single ``struct btf_variable`` with the
following data::

    struct btf_var {
        __u32   linkage;
    };

``struct btf_var`` encoding:
  * ``linkage``: currently only static variable 0, or globally allocated
                 variable in ELF sections 1

Not all type of global variables are supported by LLVM at this point.
The following is currently available:

  * static variables with or without section attributes
  * global variables with section attributes

The latter is for future extraction of map key/value type id's from a
map definition.

2.2.15 BTF_KIND_DATASEC
~~~~~~~~~~~~~~~~~~~~~~~

``struct btf_type`` encoding requirement:
  * ``name_off``: offset to a valid name associated with a variable or
                  one of .data/.bss/.rodata
  * ``info.kind_flag``: 0
  * ``info.kind``: BTF_KIND_DATASEC
  * ``info.vlen``: # of variables
  * ``size``: total section size in bytes (0 at compilation time, patched
              to actual size by BPF loaders such as libbpf)

``btf_type`` is followed by ``info.vlen`` number of ``struct btf_var_secinfo``.::

    struct btf_var_secinfo {
        __u32   type;
        __u32   offset;
        __u32   size;
    };

``struct btf_var_secinfo`` encoding:
  * ``type``: the type of the BTF_KIND_VAR variable
  * ``offset``: the in-section offset of the variable
  * ``size``: the size of the variable in bytes

3. BTF Kernel API
*****************

+37 −1
Original line number Diff line number Diff line
@@ -57,6 +57,12 @@ struct bpf_map_ops {
			     const struct btf *btf,
			     const struct btf_type *key_type,
			     const struct btf_type *value_type);

	/* Direct value access helpers. */
	int (*map_direct_value_addr)(const struct bpf_map *map,
				     u64 *imm, u32 off);
	int (*map_direct_value_meta)(const struct bpf_map *map,
				     u64 imm, u32 *off);
};

struct bpf_map {
@@ -81,7 +87,8 @@ struct bpf_map {
	struct btf *btf;
	u32 pages;
	bool unpriv_array;
	/* 51 bytes hole */
	bool frozen; /* write-once */
	/* 48 bytes hole */

	/* The 3rd and 4th cacheline with misc members to avoid false sharing
	 * particularly with refcounting.
@@ -424,6 +431,35 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32

#define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
				 BPF_F_RDONLY_PROG |	\
				 BPF_F_WRONLY |		\
				 BPF_F_WRONLY_PROG)

#define BPF_MAP_CAN_READ	BIT(0)
#define BPF_MAP_CAN_WRITE	BIT(1)

static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
{
	u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);

	/* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
	 * not possible.
	 */
	if (access_flags & BPF_F_RDONLY_PROG)
		return BPF_MAP_CAN_READ;
	else if (access_flags & BPF_F_WRONLY_PROG)
		return BPF_MAP_CAN_WRITE;
	else
		return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
}

static inline bool bpf_map_flags_access_ok(u32 access_flags)
{
	return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
	       (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
}

struct bpf_event_entry {
	struct perf_event *event;
	struct file *perf_file;
+4 −0
Original line number Diff line number Diff line
@@ -224,6 +224,10 @@ struct bpf_insn_aux_data {
		unsigned long map_state;	/* pointer/poison value for maps */
		s32 call_imm;			/* saved imm field of call insn */
		u32 alu_limit;			/* limit for add/sub register with pointer */
		struct {
			u32 map_index;		/* index into used_maps[] */
			u32 map_off;		/* offset from value base address */
		};
	};
	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
	int sanitize_stack_off; /* stack slot to be cleared */
+1 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
			   const struct btf_member *m,
			   u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);

#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+18 −2
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ enum bpf_cmd {
	BPF_BTF_GET_FD_BY_ID,
	BPF_TASK_FD_QUERY,
	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
	BPF_MAP_FREEZE,
};

enum bpf_map_type {
@@ -255,8 +256,19 @@ enum bpf_attach_type {
 */
#define BPF_F_ANY_ALIGNMENT	(1U << 1)

/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 * two extensions:
 *
 * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
 * insn[0].imm:      map fd              map fd
 * insn[1].imm:      0                   offset into value
 * insn[0].off:      0                   0
 * insn[1].off:      0                   0
 * ldimm64 rewrite:  address of map      address of map[0]+offset
 * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
 */
#define BPF_PSEUDO_MAP_FD	1
#define BPF_PSEUDO_MAP_VALUE	2

/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
 * offset to another bpf function
@@ -283,7 +295,7 @@ enum bpf_attach_type {

#define BPF_OBJ_NAME_LEN 16U

/* Flags for accessing BPF object */
/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY		(1U << 3)
#define BPF_F_WRONLY		(1U << 4)

@@ -293,6 +305,10 @@ enum bpf_attach_type {
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED		(1U << 6)

/* Flags for accessing BPF object from program side. */
#define BPF_F_RDONLY_PROG	(1U << 7)
#define BPF_F_WRONLY_PROG	(1U << 8)

/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE	(1U << 0)

Loading