Merge branch 'bpf-verifier-scalability' (cc441a69) · Commits · 戴 / test

include/linux/bpf.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -421,6 +421,7 @@ struct bpf_array {
		};
		};

		#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
		#define MAX_TAIL_CALL_CNT 32

		struct bpf_event_entry {

include/linux/bpf_verifier.h

+23 −0

Original line number	Diff line number	Diff line
		@@ -207,6 +207,7 @@ struct bpf_verifier_state {
		struct bpf_verifier_state_list {
		struct bpf_verifier_state state;
		struct bpf_verifier_state_list *next;
		int miss_cnt, hit_cnt;
		};

		/* Possible states for alu_state member. */
		@@ -248,6 +249,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
		return log->len_used >= log->len_total - 1;
		}

		#define BPF_LOG_LEVEL1 1
		#define BPF_LOG_LEVEL2 2
		#define BPF_LOG_STATS 4
		#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 \| BPF_LOG_LEVEL2)
		#define BPF_LOG_MASK (BPF_LOG_LEVEL \| BPF_LOG_STATS)

		static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
		{
		return log->level && log->ubuf && !bpf_verifier_log_full(log);
		@@ -274,6 +281,7 @@ struct bpf_verifier_env {
		bool strict_alignment; /* perform strict pointer alignment checks */
		struct bpf_verifier_state cur_state; / current verifier state */
		struct bpf_verifier_state_list *explored_states; / search pruning optimization */
		struct bpf_verifier_state_list *free_list;
		struct bpf_map used_maps[MAX_USED_MAPS]; / array of map's used by eBPF program */
		u32 used_map_cnt; /* number of used maps */
		u32 id_gen; /* used to generate unique reg IDs */
		@@ -284,6 +292,21 @@ struct bpf_verifier_env {
		struct bpf_verifier_log log;
		struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
		u32 subprog_cnt;
		/* number of instructions analyzed by the verifier */
		u32 insn_processed;
		/* total verification time */
		u64 verification_time;
		/* maximum number of verifier states kept in 'branching' instructions */
		u32 max_states_per_insn;
		/* total number of allocated verifier states */
		u32 total_states;
		/* some states are freed during program analysis.
		* this is peak number of states. this number dominates kernel
		* memory consumption during verification
		*/
		u32 peak_states;
		/* longest register parentage chain walked for liveness marking */
		u32 longest_mark_read_walk;
		};

		__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,

kernel/bpf/core.c

+6 −5

Original line number	Diff line number	Diff line
		@@ -438,6 +438,7 @@ struct bpf_prog bpf_patch_insn_single(struct bpf_prog prog, u32 off,
		u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
		const u32 cnt_max = S16_MAX;
		struct bpf_prog *prog_adj;
		int err;

		/* Since our patchlet doesn't expand the image, we're done. */
		if (insn_delta == 0) {
		@@ -453,8 +454,8 @@ struct bpf_prog bpf_patch_insn_single(struct bpf_prog prog, u32 off,
		* we afterwards may not fail anymore.
		*/
		if (insn_adj_cnt > cnt_max &&
		bpf_adj_branches(prog, off, off + 1, off + len, true))
		return NULL;
		(err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
		return ERR_PTR(err);

		/* Several new instructions need to be inserted. Make room
		* for them. Likely, there's no need for a new allocation as
		@@ -463,7 +464,7 @@ struct bpf_prog bpf_patch_insn_single(struct bpf_prog prog, u32 off,
		prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
		GFP_USER);
		if (!prog_adj)
		return NULL;
		return ERR_PTR(-ENOMEM);

		prog_adj->len = insn_adj_cnt;

		@@ -1096,13 +1097,13 @@ struct bpf_prog bpf_jit_blind_constants(struct bpf_prog prog)
		continue;

		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
		if (!tmp) {
		if (IS_ERR(tmp)) {
		/* Patching may have repointed aux->prog during
		* realloc from the original one, so we need to
		* fix it up here on error.
		*/
		bpf_jit_prog_release_other(prog, clone);
		return ERR_PTR(-ENOMEM);
		return tmp;
		}

		clone = tmp;

kernel/bpf/syscall.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -1557,7 +1557,8 @@ static int bpf_prog_load(union bpf_attr attr, union bpf_attr __user uattr)
		/* eBPF programs must be GPL compatible to use GPL-ed functions */
		is_gpl = license_is_gpl_compatible(license);

		if (attr->insn_cnt == 0 \|\| attr->insn_cnt > BPF_MAXINSNS)
		if (attr->insn_cnt == 0 \|\|
		attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
		return -E2BIG;
		if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
		type != BPF_PROG_TYPE_CGROUP_SKB &&

kernel/bpf/verifier.c

+116 −37

Original line number	Diff line number	Diff line
		@@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem {
		struct bpf_verifier_stack_elem *next;
		};

		#define BPF_COMPLEXITY_LIMIT_INSNS 131072
		#define BPF_COMPLEXITY_LIMIT_STACK 1024
		#define BPF_COMPLEXITY_LIMIT_STATES 64

		@@ -1092,7 +1091,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
		*/
		subprog[env->subprog_cnt].start = insn_cnt;

		if (env->log.level > 1)
		if (env->log.level & BPF_LOG_LEVEL2)
		for (i = 0; i < env->subprog_cnt; i++)
		verbose(env, "func#%d @%d\n", i, subprog[i].start);

		@@ -1139,6 +1138,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
		struct bpf_reg_state *parent)
		{
		bool writes = parent == state->parent; /* Observe write marks */
		int cnt = 0;

		while (parent) {
		/* if read wasn't screened by an earlier write ... */
		@@ -1150,12 +1150,25 @@ static int mark_reg_read(struct bpf_verifier_env *env,
		parent->var_off.value, parent->off);
		return -EFAULT;
		}
		if (parent->live & REG_LIVE_READ)
		/* The parentage chain never changes and
		* this parent was already marked as LIVE_READ.
		* There is no need to keep walking the chain again and
		* keep re-marking all parents as LIVE_READ.
		* This case happens when the same register is read
		* multiple times without writes into it in-between.
		*/
		break;
		/* ... then we depend on parent's value */
		parent->live \|= REG_LIVE_READ;
		state = parent;
		parent = state->parent;
		writes = true;
		cnt++;
		}

		if (env->longest_mark_read_walk < cnt)
		env->longest_mark_read_walk = cnt;
		return 0;
		}

		@@ -1455,7 +1468,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
		* need to try adding each of min_value and max_value to off
		* to make sure our theoretical access will be safe.
		*/
		if (env->log.level)
		if (env->log.level & BPF_LOG_LEVEL)
		print_verifier_state(env, state);

		/* The minimum value is only important with signed
		@@ -2938,7 +2951,7 @@ static int check_func_call(struct bpf_verifier_env env, struct bpf_insn insn,
		/* and go analyze first insn of the callee */
		*insn_idx = target_insn;

		if (env->log.level) {
		if (env->log.level & BPF_LOG_LEVEL) {
		verbose(env, "caller:\n");
		print_verifier_state(env, caller);
		verbose(env, "callee:\n");
		@@ -2978,7 +2991,7 @@ static int prepare_func_exit(struct bpf_verifier_env env, int insn_idx)
		return err;

		*insn_idx = callee->callsite + 1;
		if (env->log.level) {
		if (env->log.level & BPF_LOG_LEVEL) {
		verbose(env, "returning from callee:\n");
		print_verifier_state(env, callee);
		verbose(env, "to caller at %d:\n", *insn_idx);
		@@ -5001,7 +5014,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
		insn->dst_reg);
		return -EACCES;
		}
		if (env->log.level)
		if (env->log.level & BPF_LOG_LEVEL)
		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
		return 0;
		}
		@@ -5299,13 +5312,13 @@ static int check_cfg(struct bpf_verifier_env *env)
		int ret = 0;
		int i, t;

		insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
		insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
		if (!insn_state)
		return -ENOMEM;

		insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
		insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
		if (!insn_stack) {
		kfree(insn_state);
		kvfree(insn_state);
		return -ENOMEM;
		}

		@@ -5403,8 +5416,8 @@ check_state:
		ret = 0; /* cfg looks good */

		err_free:
		kfree(insn_state);
		kfree(insn_stack);
		kvfree(insn_state);
		kvfree(insn_stack);
		return ret;
		}

		@@ -6147,11 +6160,13 @@ static int propagate_liveness(struct bpf_verifier_env *env,
		static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
		{
		struct bpf_verifier_state_list *new_sl;
		struct bpf_verifier_state_list *sl;
		struct bpf_verifier_state_list sl, *pprev;
		struct bpf_verifier_state cur = env->cur_state, new;
		int i, j, err, states_cnt = 0;

		sl = env->explored_states[insn_idx];
		pprev = &env->explored_states[insn_idx];
		sl = *pprev;

		if (!sl)
		/* this 'insn_idx' instruction wasn't marked, so we will not
		* be doing state search here
		@@ -6162,6 +6177,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)

		while (sl != STATE_LIST_MARK) {
		if (states_equal(env, &sl->state, cur)) {
		sl->hit_cnt++;
		/* reached equivalent register/stack state,
		* prune the search.
		* Registers read by the continuation are read by us.
		@@ -6177,9 +6193,39 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
		return err;
		return 1;
		}
		sl = sl->next;
		states_cnt++;
		sl->miss_cnt++;
		/* heuristic to determine whether this state is beneficial
		* to keep checking from state equivalence point of view.
		* Higher numbers increase max_states_per_insn and verification time,
		* but do not meaningfully decrease insn_processed.
		*/
		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
		/* the state is unlikely to be useful. Remove it to
		* speed up verification
		*/
		*pprev = sl->next;
		if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
		free_verifier_state(&sl->state, false);
		kfree(sl);
		env->peak_states--;
		} else {
		/* cannot free this state, since parentage chain may
		* walk it later. Add it for free_list instead to
		* be freed at the end of verification
		*/
		sl->next = env->free_list;
		env->free_list = sl;
		}
		sl = *pprev;
		continue;
		}
		pprev = &sl->next;
		sl = *pprev;
		}

		if (env->max_states_per_insn < states_cnt)
		env->max_states_per_insn = states_cnt;

		if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
		return 0;
		@@ -6194,6 +6240,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
		new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
		if (!new_sl)
		return -ENOMEM;
		env->total_states++;
		env->peak_states++;

		/* add new state to the head of linked list */
		new = &new_sl->state;
		@@ -6278,8 +6326,7 @@ static int do_check(struct bpf_verifier_env *env)
		struct bpf_verifier_state *state;
		struct bpf_insn *insns = env->prog->insnsi;
		struct bpf_reg_state *regs;
		int insn_cnt = env->prog->len, i;
		int insn_processed = 0;
		int insn_cnt = env->prog->len;
		bool do_print_state = false;

		env->prev_linfo = NULL;
		@@ -6314,10 +6361,10 @@ static int do_check(struct bpf_verifier_env *env)
		insn = &insns[env->insn_idx];
		class = BPF_CLASS(insn->code);

		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
		verbose(env,
		"BPF program is too large. Processed %d insn\n",
		insn_processed);
		env->insn_processed);
		return -E2BIG;
		}

		@@ -6326,7 +6373,7 @@ static int do_check(struct bpf_verifier_env *env)
		return err;
		if (err == 1) {
		/* found equivalent state, can prune the search */
		if (env->log.level) {
		if (env->log.level & BPF_LOG_LEVEL) {
		if (do_print_state)
		verbose(env, "\nfrom %d to %d%s: safe\n",
		env->prev_insn_idx, env->insn_idx,
		@@ -6344,8 +6391,9 @@ static int do_check(struct bpf_verifier_env *env)
		if (need_resched())
		cond_resched();

		if (env->log.level > 1 \|\| (env->log.level && do_print_state)) {
		if (env->log.level > 1)
		if (env->log.level & BPF_LOG_LEVEL2 \|\|
		(env->log.level & BPF_LOG_LEVEL && do_print_state)) {
		if (env->log.level & BPF_LOG_LEVEL2)
		verbose(env, "%d:", env->insn_idx);
		else
		verbose(env, "\nfrom %d to %d%s:",
		@@ -6356,7 +6404,7 @@ static int do_check(struct bpf_verifier_env *env)
		do_print_state = false;
		}

		if (env->log.level) {
		if (env->log.level & BPF_LOG_LEVEL) {
		const struct bpf_insn_cbs cbs = {
		.cb_print = verbose,
		.private_data = env,
		@@ -6621,16 +6669,6 @@ process_bpf_exit:
		env->insn_idx++;
		}

		verbose(env, "processed %d insns (limit %d), stack depth ",
		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
		for (i = 0; i < env->subprog_cnt; i++) {
		u32 depth = env->subprog_info[i].stack_depth;

		verbose(env, "%d", depth);
		if (i + 1 < env->subprog_cnt)
		verbose(env, "+");
		}
		verbose(env, "\n");
		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
		return 0;
		}
		@@ -6893,8 +6931,13 @@ static struct bpf_prog bpf_patch_insn_data(struct bpf_verifier_env env, u32 of
		struct bpf_prog *new_prog;

		new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
		if (!new_prog)
		if (IS_ERR(new_prog)) {
		if (PTR_ERR(new_prog) == -ERANGE)
		verbose(env,
		"insn %d cannot be patched due to 16-bit range\n",
		env->insn_aux_data[off].orig_idx);
		return NULL;
		}
		if (adjust_insn_aux_data(env, new_prog->len, off, len))
		return NULL;
		adjust_subprog_starts(env, off, len);
		@@ -7836,6 +7879,14 @@ static void free_states(struct bpf_verifier_env *env)
		struct bpf_verifier_state_list sl, sln;
		int i;

		sl = env->free_list;
		while (sl) {
		sln = sl->next;
		free_verifier_state(&sl->state, false);
		kfree(sl);
		sl = sln;
		}

		if (!env->explored_states)
		return;

		@@ -7851,12 +7902,37 @@ static void free_states(struct bpf_verifier_env *env)
		}
		}

		kfree(env->explored_states);
		kvfree(env->explored_states);
		}

		static void print_verification_stats(struct bpf_verifier_env *env)
		{
		int i;

		if (env->log.level & BPF_LOG_STATS) {
		verbose(env, "verification time %lld usec\n",
		div_u64(env->verification_time, 1000));
		verbose(env, "stack depth ");
		for (i = 0; i < env->subprog_cnt; i++) {
		u32 depth = env->subprog_info[i].stack_depth;

		verbose(env, "%d", depth);
		if (i + 1 < env->subprog_cnt)
		verbose(env, "+");
		}
		verbose(env, "\n");
		}
		verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
		"total_states %d peak_states %d mark_read %d\n",
		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
		env->max_states_per_insn, env->total_states,
		env->peak_states, env->longest_mark_read_walk);
		}

		int bpf_check(struct bpf_prog *prog, union bpf_attr attr,
		union bpf_attr __user *uattr)
		{
		u64 start_time = ktime_get_ns();
		struct bpf_verifier_env *env;
		struct bpf_verifier_log *log;
		int i, len, ret = -EINVAL;
		@@ -7898,8 +7974,8 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr attr,

		ret = -EINVAL;
		/* log attributes have to be sane */
		if (log->len_total < 128 \|\| log->len_total > UINT_MAX >> 8 \|\|
		!log->level \|\| !log->ubuf)
		if (log->len_total < 128 \|\| log->len_total > UINT_MAX >> 2 \|\|
		!log->level \|\| !log->ubuf \|\| log->level & ~BPF_LOG_MASK)
		goto err_unlock;
		}

		@@ -7922,7 +7998,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr attr,
		goto skip_full_check;
		}

		env->explored_states = kcalloc(env->prog->len,
		env->explored_states = kvcalloc(env->prog->len,
		sizeof(struct bpf_verifier_state_list *),
		GFP_USER);
		ret = -ENOMEM;
		@@ -7980,6 +8056,9 @@ skip_full_check:
		if (ret == 0)
		ret = fixup_call_args(env);

		env->verification_time = ktime_get_ns() - start_time;
		print_verification_stats(env);

		if (log->level && bpf_verifier_log_full(log))
		ret = -ENOSPC;
		if (log->level && !log->ubuf) {

Admin message