Commit c48b0722 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull more perf updates from Thomas Gleixner:
 "Perf updates all over the place:

  core:

   - Support for cgroup tracking in samples to allow cgroup based
     analysis

  tools:

   - Support for cgroup analysis

   - Commandline option and hotkey for perf top to change the sort order

   - A set of fixes all over the place

   - Various build system related improvements

   - Updates of the X86 pmu event JSON data

   - Documentation updates"

* tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits)
  perf python: Fix clang detection to strip out options passed in $CC
  perf tools: Support Python 3.8+ in Makefile
  perf script: Fix invalid read of directory entry after closedir()
  perf script report: Fix SEGFAULT when using DWARF mode
  perf script: add -S/--symbols documentation
  perf pmu-events x86: Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric
  perf events parser: Add missing Intel CPU events to parser
  perf script: Allow --symbol to accept hexadecimal addresses
  perf report/top TUI: Fix title line formatting
  perf top: Support hotkey to change sort order
  perf top: Support --group-sort-idx to change the sort order
  perf symbols: Fix arm64 gap between kernel start and module end
  perf build-test: Honour JOBS to override detection of number of cores
  perf script: Add --show-cgroup-events option
  perf top: Add --all-cgroups option
  perf record: Add --all-cgroups option
  perf record: Support synthesizing cgroup events
  perf report: Add 'cgroup' sort key
  perf cgroup: Maintain cgroup hierarchy
  perf tools: Basic support for CGROUP event
  ...
parents d5ca3273 7dc41b9b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1020,6 +1020,7 @@ struct perf_sample_data {
	u64				stack_user_size;

	u64				phys_addr;
	u64				cgroup;
} ____cacheline_aligned;

/* default value for data source */
+14 −2
Original line number Diff line number Diff line
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
	PERF_SAMPLE_REGS_INTR			= 1U << 18,
	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
	PERF_SAMPLE_AUX				= 1U << 20,
	PERF_SAMPLE_CGROUP			= 1U << 21,

	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */

	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
};
@@ -381,7 +382,8 @@ struct perf_event_attr {
				ksymbol        :  1, /* include ksymbol events */
				bpf_event      :  1, /* include bpf events */
				aux_output     :  1, /* generate AUX records instead of events */
				__reserved_1   : 32;
				cgroup         :  1, /* include cgroup events */
				__reserved_1   : 31;

	union {
		__u32		wakeup_events;	  /* wakeup every n events */
@@ -1012,6 +1014,16 @@ enum perf_event_type {
	 */
	PERF_RECORD_BPF_EVENT			= 18,

	/*
	 * struct {
	 *	struct perf_event_header	header;
	 *	u64				id;
	 *	char				path[];
	 *	struct sample_id		sample_id;
	 * };
	 */
	PERF_RECORD_CGROUP			= 19,

	PERF_RECORD_MAX,			/* non-ABI */
};

+2 −1
Original line number Diff line number Diff line
@@ -1029,7 +1029,8 @@ config CGROUP_PERF
	help
	  This option extends the perf per-cpu mode to restrict monitoring
	  to threads which belong to the cgroup specified and run on the
	  designated cpu.
	  designated cpu.  Or this can be used to have cgroup ID in samples
	  so that it can monitor performance events among cgroups.

	  Say N if unsure.

+133 −0
Original line number Diff line number Diff line
@@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;

static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -1861,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		size += sizeof(data->phys_addr);

	if (sample_type & PERF_SAMPLE_CGROUP)
		size += sizeof(data->cgroup);

	event->header_size = size;
}

@@ -4608,6 +4612,8 @@ static void unaccount_event(struct perf_event *event)
		atomic_dec(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_dec(&nr_namespaces_events);
	if (event->attr.cgroup)
		atomic_dec(&nr_cgroup_events);
	if (event->attr.task)
		atomic_dec(&nr_task_events);
	if (event->attr.freq)
@@ -6864,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		perf_output_put(handle, data->phys_addr);

	if (sample_type & PERF_SAMPLE_CGROUP)
		perf_output_put(handle, data->cgroup);

	if (sample_type & PERF_SAMPLE_AUX) {
		perf_output_put(handle, data->aux_size);

@@ -7063,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		data->phys_addr = perf_virt_to_phys(data->addr);

#ifdef CONFIG_CGROUP_PERF
	if (sample_type & PERF_SAMPLE_CGROUP) {
		struct cgroup *cgrp;

		/* protected by RCU */
		cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
		data->cgroup = cgroup_id(cgrp);
	}
#endif

	if (sample_type & PERF_SAMPLE_AUX) {
		u64 size;

@@ -7735,6 +7754,105 @@ void perf_event_namespaces(struct task_struct *task)
			NULL);
}

/*
 * cgroup tracking
 */
#ifdef CONFIG_CGROUP_PERF

struct perf_cgroup_event {
	char				*path;
	int				path_size;
	struct {
		struct perf_event_header	header;
		u64				id;
		char				path[];
	} event_id;
};

static int perf_event_cgroup_match(struct perf_event *event)
{
	return event->attr.cgroup;
}

static void perf_event_cgroup_output(struct perf_event *event, void *data)
{
	struct perf_cgroup_event *cgroup_event = data;
	struct perf_output_handle handle;
	struct perf_sample_data sample;
	u16 header_size = cgroup_event->event_id.header.size;
	int ret;

	if (!perf_event_cgroup_match(event))
		return;

	perf_event_header__init_id(&cgroup_event->event_id.header,
				   &sample, event);
	ret = perf_output_begin(&handle, event,
				cgroup_event->event_id.header.size);
	if (ret)
		goto out;

	perf_output_put(&handle, cgroup_event->event_id);
	__output_copy(&handle, cgroup_event->path, cgroup_event->path_size);

	perf_event__output_id_sample(event, &handle, &sample);

	perf_output_end(&handle);
out:
	cgroup_event->event_id.header.size = header_size;
}

static void perf_event_cgroup(struct cgroup *cgrp)
{
	struct perf_cgroup_event cgroup_event;
	char path_enomem[16] = "//enomem";
	char *pathname;
	size_t size;

	if (!atomic_read(&nr_cgroup_events))
		return;

	cgroup_event = (struct perf_cgroup_event){
		.event_id  = {
			.header = {
				.type = PERF_RECORD_CGROUP,
				.misc = 0,
				.size = sizeof(cgroup_event.event_id),
			},
			.id = cgroup_id(cgrp),
		},
	};

	pathname = kmalloc(PATH_MAX, GFP_KERNEL);
	if (pathname == NULL) {
		cgroup_event.path = path_enomem;
	} else {
		/* just to be sure to have enough space for alignment */
		cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
		cgroup_event.path = pathname;
	}

	/*
	 * Since our buffer works in 8 byte units we need to align our string
	 * size to a multiple of 8. However, we must guarantee the tail end is
	 * zero'd out to avoid leaking random bits to userspace.
	 */
	size = strlen(cgroup_event.path) + 1;
	while (!IS_ALIGNED(size, sizeof(u64)))
		cgroup_event.path[size++] = '\0';

	cgroup_event.event_id.header.size += size;
	cgroup_event.path_size = size;

	perf_iterate_sb(perf_event_cgroup_output,
			&cgroup_event,
			NULL);

	kfree(pathname);
}

#endif

/*
 * mmap tracking
 */
@@ -10778,6 +10896,8 @@ static void account_event(struct perf_event *event)
		atomic_inc(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_inc(&nr_namespaces_events);
	if (event->attr.cgroup)
		atomic_inc(&nr_cgroup_events);
	if (event->attr.task)
		atomic_inc(&nr_task_events);
	if (event->attr.freq)
@@ -11157,6 +11277,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,

	if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
		ret = perf_reg_validate(attr->sample_regs_intr);

#ifndef CONFIG_CGROUP_PERF
	if (attr->sample_type & PERF_SAMPLE_CGROUP)
		return -EINVAL;
#endif

out:
	return ret;

@@ -12754,6 +12880,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
	kfree(jc);
}

static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
{
	perf_event_cgroup(css->cgroup);
	return 0;
}

static int __perf_cgroup_move(void *info)
{
	struct task_struct *task = info;
@@ -12775,6 +12907,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_subsys perf_event_cgrp_subsys = {
	.css_alloc	= perf_cgroup_css_alloc,
	.css_free	= perf_cgroup_css_free,
	.css_online	= perf_cgroup_css_online,
	.attach		= perf_cgroup_attach,
	/*
	 * Implicitly enable on dfl hierarchy so that perf events can
+2 −1
Original line number Diff line number Diff line
@@ -72,7 +72,8 @@ FEATURE_TESTS_BASIC := \
        setns				\
        libaio				\
        libzstd				\
        disassembler-four-args
        disassembler-four-args		\
        file-handle

# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
Loading