Commit cc441a69 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-verifier-scalability'



Alexei Starovoitov says:

====================
v1->v2:
- fixed typo in patch 1
- added a patch to convert kcalloc to kvcalloc
- added a patch to verbose 16-bit jump offset check
- added a test with 1m insns

This patch set is the first step to be able to accept large programs.
The verifier still suffers from its brute force algorithm and
large programs can easily hit 1M insn_processed limit.
A lot more work is necessary to be able to verify large programs.

v1:
Realize two key ideas to speed up verification speed by ~20 times
1. every 'branching' instructions records all verifier states.
   not all of them are useful for search pruning.
   add a simple heuristic to keep states that were successful in search pruning
   and remove those that were not
2. mark_reg_read walks parentage chain of registers to mark parents as LIVE_READ.
   Once the register is marked there is no need to remark it again in the future.
   Hence stop walking the chain once first LIVE_READ is seen.

1st optimization gives 10x speed up on large programs
and 2nd optimization reduces the cost of mark_reg_read from ~40% of cpu to <1%.
Combined the deliver ~20x speedup on large programs.

Faster and bounded verification time allows to increase insn_processed
limit to 1 million from 130k.
Worst case it takes 1/10 of a second to process that many instructions
and peak memory consumption is peak_states * sizeof(struct bpf_verifier_state)
which is around ~5Mbyte.

Increase insn_per_program limit for root to insn_processed limit.

Add verification stats and stress tests for verifier scalability.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents e83b9f55 8aa2d4b4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -421,6 +421,7 @@ struct bpf_array {
	};
};

#define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32

struct bpf_event_entry {
+23 −0
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ struct bpf_verifier_state {
struct bpf_verifier_state_list {
	struct bpf_verifier_state state;
	struct bpf_verifier_state_list *next;
	int miss_cnt, hit_cnt;
};

/* Possible states for alu_state member. */
@@ -248,6 +249,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
	return log->len_used >= log->len_total - 1;
}

#define BPF_LOG_LEVEL1	1
#define BPF_LOG_LEVEL2	2
#define BPF_LOG_STATS	4
#define BPF_LOG_LEVEL	(BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
#define BPF_LOG_MASK	(BPF_LOG_LEVEL | BPF_LOG_STATS)

static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
	return log->level && log->ubuf && !bpf_verifier_log_full(log);
@@ -274,6 +281,7 @@ struct bpf_verifier_env {
	bool strict_alignment;		/* perform strict pointer alignment checks */
	struct bpf_verifier_state *cur_state; /* current verifier state */
	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
	struct bpf_verifier_state_list *free_list;
	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
	u32 used_map_cnt;		/* number of used maps */
	u32 id_gen;			/* used to generate unique reg IDs */
@@ -284,6 +292,21 @@ struct bpf_verifier_env {
	struct bpf_verifier_log log;
	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
	u32 subprog_cnt;
	/* number of instructions analyzed by the verifier */
	u32 insn_processed;
	/* total verification time */
	u64 verification_time;
	/* maximum number of verifier states kept in 'branching' instructions */
	u32 max_states_per_insn;
	/* total number of allocated verifier states */
	u32 total_states;
	/* some states are freed during program analysis.
	 * this is peak number of states. this number dominates kernel
	 * memory consumption during verification
	 */
	u32 peak_states;
	/* longest register parentage chain walked for liveness marking */
	u32 longest_mark_read_walk;
};

__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+6 −5
Original line number Diff line number Diff line
@@ -438,6 +438,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
	u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
	const u32 cnt_max = S16_MAX;
	struct bpf_prog *prog_adj;
	int err;

	/* Since our patchlet doesn't expand the image, we're done. */
	if (insn_delta == 0) {
@@ -453,8 +454,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
	 * we afterwards may not fail anymore.
	 */
	if (insn_adj_cnt > cnt_max &&
	    bpf_adj_branches(prog, off, off + 1, off + len, true))
		return NULL;
	    (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
		return ERR_PTR(err);

	/* Several new instructions need to be inserted. Make room
	 * for them. Likely, there's no need for a new allocation as
@@ -463,7 +464,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
	prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
				    GFP_USER);
	if (!prog_adj)
		return NULL;
		return ERR_PTR(-ENOMEM);

	prog_adj->len = insn_adj_cnt;

@@ -1096,13 +1097,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
			continue;

		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
		if (!tmp) {
		if (IS_ERR(tmp)) {
			/* Patching may have repointed aux->prog during
			 * realloc from the original one, so we need to
			 * fix it up here on error.
			 */
			bpf_jit_prog_release_other(prog, clone);
			return ERR_PTR(-ENOMEM);
			return tmp;
		}

		clone = tmp;
+2 −1
Original line number Diff line number Diff line
@@ -1557,7 +1557,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
	/* eBPF programs must be GPL compatible to use GPL-ed functions */
	is_gpl = license_is_gpl_compatible(license);

	if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
	if (attr->insn_cnt == 0 ||
	    attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
		return -E2BIG;
	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
	    type != BPF_PROG_TYPE_CGROUP_SKB &&
+116 −37
Original line number Diff line number Diff line
@@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem {
	struct bpf_verifier_stack_elem *next;
};

#define BPF_COMPLEXITY_LIMIT_INSNS	131072
#define BPF_COMPLEXITY_LIMIT_STACK	1024
#define BPF_COMPLEXITY_LIMIT_STATES	64

@@ -1092,7 +1091,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
	 */
	subprog[env->subprog_cnt].start = insn_cnt;

	if (env->log.level > 1)
	if (env->log.level & BPF_LOG_LEVEL2)
		for (i = 0; i < env->subprog_cnt; i++)
			verbose(env, "func#%d @%d\n", i, subprog[i].start);

@@ -1139,6 +1138,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
			 struct bpf_reg_state *parent)
{
	bool writes = parent == state->parent; /* Observe write marks */
	int cnt = 0;

	while (parent) {
		/* if read wasn't screened by an earlier write ... */
@@ -1150,12 +1150,25 @@ static int mark_reg_read(struct bpf_verifier_env *env,
				parent->var_off.value, parent->off);
			return -EFAULT;
		}
		if (parent->live & REG_LIVE_READ)
			/* The parentage chain never changes and
			 * this parent was already marked as LIVE_READ.
			 * There is no need to keep walking the chain again and
			 * keep re-marking all parents as LIVE_READ.
			 * This case happens when the same register is read
			 * multiple times without writes into it in-between.
			 */
			break;
		/* ... then we depend on parent's value */
		parent->live |= REG_LIVE_READ;
		state = parent;
		parent = state->parent;
		writes = true;
		cnt++;
	}

	if (env->longest_mark_read_walk < cnt)
		env->longest_mark_read_walk = cnt;
	return 0;
}

@@ -1455,7 +1468,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
	 * need to try adding each of min_value and max_value to off
	 * to make sure our theoretical access will be safe.
	 */
	if (env->log.level)
	if (env->log.level & BPF_LOG_LEVEL)
		print_verifier_state(env, state);

	/* The minimum value is only important with signed
@@ -2938,7 +2951,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
	/* and go analyze first insn of the callee */
	*insn_idx = target_insn;

	if (env->log.level) {
	if (env->log.level & BPF_LOG_LEVEL) {
		verbose(env, "caller:\n");
		print_verifier_state(env, caller);
		verbose(env, "callee:\n");
@@ -2978,7 +2991,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
		return err;

	*insn_idx = callee->callsite + 1;
	if (env->log.level) {
	if (env->log.level & BPF_LOG_LEVEL) {
		verbose(env, "returning from callee:\n");
		print_verifier_state(env, callee);
		verbose(env, "to caller at %d:\n", *insn_idx);
@@ -5001,7 +5014,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
			insn->dst_reg);
		return -EACCES;
	}
	if (env->log.level)
	if (env->log.level & BPF_LOG_LEVEL)
		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
	return 0;
}
@@ -5299,13 +5312,13 @@ static int check_cfg(struct bpf_verifier_env *env)
	int ret = 0;
	int i, t;

	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
	insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
	if (!insn_state)
		return -ENOMEM;

	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
	insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
	if (!insn_stack) {
		kfree(insn_state);
		kvfree(insn_state);
		return -ENOMEM;
	}

@@ -5403,8 +5416,8 @@ check_state:
	ret = 0; /* cfg looks good */

err_free:
	kfree(insn_state);
	kfree(insn_stack);
	kvfree(insn_state);
	kvfree(insn_stack);
	return ret;
}

@@ -6147,11 +6160,13 @@ static int propagate_liveness(struct bpf_verifier_env *env,
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
	struct bpf_verifier_state_list *new_sl;
	struct bpf_verifier_state_list *sl;
	struct bpf_verifier_state_list *sl, **pprev;
	struct bpf_verifier_state *cur = env->cur_state, *new;
	int i, j, err, states_cnt = 0;

	sl = env->explored_states[insn_idx];
	pprev = &env->explored_states[insn_idx];
	sl = *pprev;

	if (!sl)
		/* this 'insn_idx' instruction wasn't marked, so we will not
		 * be doing state search here
@@ -6162,6 +6177,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)

	while (sl != STATE_LIST_MARK) {
		if (states_equal(env, &sl->state, cur)) {
			sl->hit_cnt++;
			/* reached equivalent register/stack state,
			 * prune the search.
			 * Registers read by the continuation are read by us.
@@ -6177,9 +6193,39 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
				return err;
			return 1;
		}
		sl = sl->next;
		states_cnt++;
		sl->miss_cnt++;
		/* heuristic to determine whether this state is beneficial
		 * to keep checking from state equivalence point of view.
		 * Higher numbers increase max_states_per_insn and verification time,
		 * but do not meaningfully decrease insn_processed.
		 */
		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
			/* the state is unlikely to be useful. Remove it to
			 * speed up verification
			 */
			*pprev = sl->next;
			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
				free_verifier_state(&sl->state, false);
				kfree(sl);
				env->peak_states--;
			} else {
				/* cannot free this state, since parentage chain may
				 * walk it later. Add it for free_list instead to
				 * be freed at the end of verification
				 */
				sl->next = env->free_list;
				env->free_list = sl;
			}
			sl = *pprev;
			continue;
		}
		pprev = &sl->next;
		sl = *pprev;
	}

	if (env->max_states_per_insn < states_cnt)
		env->max_states_per_insn = states_cnt;

	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
		return 0;
@@ -6194,6 +6240,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
	if (!new_sl)
		return -ENOMEM;
	env->total_states++;
	env->peak_states++;

	/* add new state to the head of linked list */
	new = &new_sl->state;
@@ -6278,8 +6326,7 @@ static int do_check(struct bpf_verifier_env *env)
	struct bpf_verifier_state *state;
	struct bpf_insn *insns = env->prog->insnsi;
	struct bpf_reg_state *regs;
	int insn_cnt = env->prog->len, i;
	int insn_processed = 0;
	int insn_cnt = env->prog->len;
	bool do_print_state = false;

	env->prev_linfo = NULL;
@@ -6314,10 +6361,10 @@ static int do_check(struct bpf_verifier_env *env)
		insn = &insns[env->insn_idx];
		class = BPF_CLASS(insn->code);

		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
			verbose(env,
				"BPF program is too large. Processed %d insn\n",
				insn_processed);
				env->insn_processed);
			return -E2BIG;
		}

@@ -6326,7 +6373,7 @@ static int do_check(struct bpf_verifier_env *env)
			return err;
		if (err == 1) {
			/* found equivalent state, can prune the search */
			if (env->log.level) {
			if (env->log.level & BPF_LOG_LEVEL) {
				if (do_print_state)
					verbose(env, "\nfrom %d to %d%s: safe\n",
						env->prev_insn_idx, env->insn_idx,
@@ -6344,8 +6391,9 @@ static int do_check(struct bpf_verifier_env *env)
		if (need_resched())
			cond_resched();

		if (env->log.level > 1 || (env->log.level && do_print_state)) {
			if (env->log.level > 1)
		if (env->log.level & BPF_LOG_LEVEL2 ||
		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
			if (env->log.level & BPF_LOG_LEVEL2)
				verbose(env, "%d:", env->insn_idx);
			else
				verbose(env, "\nfrom %d to %d%s:",
@@ -6356,7 +6404,7 @@ static int do_check(struct bpf_verifier_env *env)
			do_print_state = false;
		}

		if (env->log.level) {
		if (env->log.level & BPF_LOG_LEVEL) {
			const struct bpf_insn_cbs cbs = {
				.cb_print	= verbose,
				.private_data	= env,
@@ -6621,16 +6669,6 @@ process_bpf_exit:
		env->insn_idx++;
	}

	verbose(env, "processed %d insns (limit %d), stack depth ",
		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
	for (i = 0; i < env->subprog_cnt; i++) {
		u32 depth = env->subprog_info[i].stack_depth;

		verbose(env, "%d", depth);
		if (i + 1 < env->subprog_cnt)
			verbose(env, "+");
	}
	verbose(env, "\n");
	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
	return 0;
}
@@ -6893,8 +6931,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
	struct bpf_prog *new_prog;

	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
	if (!new_prog)
	if (IS_ERR(new_prog)) {
		if (PTR_ERR(new_prog) == -ERANGE)
			verbose(env,
				"insn %d cannot be patched due to 16-bit range\n",
				env->insn_aux_data[off].orig_idx);
		return NULL;
	}
	if (adjust_insn_aux_data(env, new_prog->len, off, len))
		return NULL;
	adjust_subprog_starts(env, off, len);
@@ -7836,6 +7879,14 @@ static void free_states(struct bpf_verifier_env *env)
	struct bpf_verifier_state_list *sl, *sln;
	int i;

	sl = env->free_list;
	while (sl) {
		sln = sl->next;
		free_verifier_state(&sl->state, false);
		kfree(sl);
		sl = sln;
	}

	if (!env->explored_states)
		return;

@@ -7851,12 +7902,37 @@ static void free_states(struct bpf_verifier_env *env)
			}
	}

	kfree(env->explored_states);
	kvfree(env->explored_states);
}

static void print_verification_stats(struct bpf_verifier_env *env)
{
	int i;

	if (env->log.level & BPF_LOG_STATS) {
		verbose(env, "verification time %lld usec\n",
			div_u64(env->verification_time, 1000));
		verbose(env, "stack depth ");
		for (i = 0; i < env->subprog_cnt; i++) {
			u32 depth = env->subprog_info[i].stack_depth;

			verbose(env, "%d", depth);
			if (i + 1 < env->subprog_cnt)
				verbose(env, "+");
		}
		verbose(env, "\n");
	}
	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
		"total_states %d peak_states %d mark_read %d\n",
		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
		env->max_states_per_insn, env->total_states,
		env->peak_states, env->longest_mark_read_walk);
}

int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
	      union bpf_attr __user *uattr)
{
	u64 start_time = ktime_get_ns();
	struct bpf_verifier_env *env;
	struct bpf_verifier_log *log;
	int i, len, ret = -EINVAL;
@@ -7898,8 +7974,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,

		ret = -EINVAL;
		/* log attributes have to be sane */
		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
		    !log->level || !log->ubuf)
		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
			goto err_unlock;
	}

@@ -7922,7 +7998,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
			goto skip_full_check;
	}

	env->explored_states = kcalloc(env->prog->len,
	env->explored_states = kvcalloc(env->prog->len,
				       sizeof(struct bpf_verifier_state_list *),
				       GFP_USER);
	ret = -ENOMEM;
@@ -7980,6 +8056,9 @@ skip_full_check:
	if (ret == 0)
		ret = fixup_call_args(env);

	env->verification_time = ktime_get_ns() - start_time;
	print_verification_stats(env);

	if (log->level && bpf_verifier_log_full(log))
		ret = -ENOSPC;
	if (log->level && !log->ubuf) {
Loading