Commit 3dbb5b50 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_enable_stats'



Song Liu says:

====================
run_time_ns is a useful stats for BPF programs. However, it is gated by
sysctl kernel.bpf_stats_enabled. When multiple user space tools are
toggling kernl.bpf_stats_enabled at the same time, they may confuse each
other.

Solve this problem with a new BPF command BPF_ENABLE_STATS.

Changes v8 => v9:
  1. Clean up in selftest (Andrii).
  2. Not using static variable in test program (Andrii).

Changes v7 => v8:
  1. Change name BPF_STATS_RUNTIME_CNT => BPF_STATS_RUN_TIME (Alexei).
  2. Add CHECK_ATTR to bpf_enable_stats() (Alexei).
  3. Rebase (Andrii).
  4. Simplfy the selftest (Alexei).

Changes v6 => v7:
  1. Add test to verify run_cnt matches count measured by the program.

Changes v5 => v6:
  1. Simplify test program (Yonghong).
  2. Rebase (with some conflicts).

Changes v4 => v5:
  1. Use memset to zero bpf_attr in bpf_enable_stats() (Andrii).

Changes v3 => v4:
  1. Add libbpf support and selftest;
  2. Avoid cleaning trailing space.

Changes v2 => v3:
  1. Rename the command to BPF_ENABLE_STATS, and make it extendible.
  2. fix commit log;
  3. remove unnecessary headers.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents c3210222 31a9f7fe
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -987,6 +987,7 @@ _out: \

#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;

/*
 * Block execution of BPF programs attached to instrumentation (perf,
+11 −0
Original line number Diff line number Diff line
@@ -115,6 +115,7 @@ enum bpf_cmd {
	BPF_LINK_UPDATE,
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_ENABLE_STATS,
};

enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
 */
#define BPF_F_QUERY_EFFECTIVE	(1U << 0)

/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
	/* enabled run_time_ns and run_cnt */
	BPF_STATS_RUN_TIME = 0,
};

enum bpf_stack_build_id_status {
	/* user space need an empty entry to identify end of a trace */
	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
		__u32		old_prog_fd;
	} link_update;

	struct { /* struct used by BPF_ENABLE_STATS command */
		__u32		type;
	} enable_stats;

} __attribute__((aligned(8)));

/* The description below is an attempt at providing documentation to eBPF
+57 −0
Original line number Diff line number Diff line
@@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
	return fd;
}

DEFINE_MUTEX(bpf_stats_enabled_mutex);

static int bpf_stats_release(struct inode *inode, struct file *file)
{
	mutex_lock(&bpf_stats_enabled_mutex);
	static_key_slow_dec(&bpf_stats_enabled_key.key);
	mutex_unlock(&bpf_stats_enabled_mutex);
	return 0;
}

static const struct file_operations bpf_stats_fops = {
	.release = bpf_stats_release,
};

static int bpf_enable_runtime_stats(void)
{
	int fd;

	mutex_lock(&bpf_stats_enabled_mutex);

	/* Set a very high limit to avoid overflow */
	if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
		mutex_unlock(&bpf_stats_enabled_mutex);
		return -EBUSY;
	}

	fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
	if (fd >= 0)
		static_key_slow_inc(&bpf_stats_enabled_key.key);

	mutex_unlock(&bpf_stats_enabled_mutex);
	return fd;
}

#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type

static int bpf_enable_stats(union bpf_attr *attr)
{

	if (CHECK_ATTR(BPF_ENABLE_STATS))
		return -EINVAL;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	switch (attr->enable_stats.type) {
	case BPF_STATS_RUN_TIME:
		return bpf_enable_runtime_stats();
	default:
		break;
	}
	return -EINVAL;
}

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
	union bpf_attr attr;
@@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
		err = bpf_obj_get_next_id(&attr, uattr,
					  &link_idr, &link_idr_lock);
		break;
	case BPF_ENABLE_STATS:
		err = bpf_enable_stats(&attr);
		break;
	default:
		err = -EINVAL;
		break;
+35 −1
Original line number Diff line number Diff line
@@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;

#endif /* CONFIG_SYSCTL */

#ifdef CONFIG_BPF_SYSCALL
static int bpf_stats_handler(struct ctl_table *table, int write,
			     void __user *buffer, size_t *lenp,
			     loff_t *ppos)
{
	struct static_key *key = (struct static_key *)table->data;
	static int saved_val;
	int val, ret;
	struct ctl_table tmp = {
		.data   = &val,
		.maxlen = sizeof(val),
		.mode   = table->mode,
		.extra1 = SYSCTL_ZERO,
		.extra2 = SYSCTL_ONE,
	};

	if (write && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&bpf_stats_enabled_mutex);
	val = saved_val;
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (write && !ret && val != saved_val) {
		if (val)
			static_key_slow_inc(key);
		else
			static_key_slow_dec(key);
		saved_val = val;
	}
	mutex_unlock(&bpf_stats_enabled_mutex);
	return ret;
}
#endif

/*
 * /proc/sys support
 */
@@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
		.data		= &bpf_stats_enabled_key.key,
		.maxlen		= sizeof(bpf_stats_enabled_key),
		.mode		= 0644,
		.proc_handler	= proc_do_static_key,
		.proc_handler	= bpf_stats_handler,
	},
#endif
#if defined(CONFIG_TREE_RCU)
+11 −0
Original line number Diff line number Diff line
@@ -115,6 +115,7 @@ enum bpf_cmd {
	BPF_LINK_UPDATE,
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_ENABLE_STATS,
};

enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
 */
#define BPF_F_QUERY_EFFECTIVE	(1U << 0)

/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
	/* enabled run_time_ns and run_cnt */
	BPF_STATS_RUN_TIME = 0,
};

enum bpf_stack_build_id_status {
	/* user space need an empty entry to identify end of a trace */
	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
		__u32		old_prog_fd;
	} link_update;

	struct { /* struct used by BPF_ENABLE_STATS command */
		__u32		type;
	} enable_stats;

} __attribute__((aligned(8)));

/* The description below is an attempt at providing documentation to eBPF
Loading