Commit d5ceb62b authored by Jiri Olsa's avatar Jiri Olsa Committed by Arnaldo Carvalho de Melo
Browse files

perf ordered_events: Add 'struct ordered_events_buffer' layer

When ordering events, we use preallocated buffers to store separate
events.  Those buffers currently don't have their own struct, but since
they are basically an array of 'struct ordered_event' objects, we use
the first event to hold buffers data - list head, that holds all buffers
together:

   struct ordered_events {
     ...
     struct ordered_event *buffer;
     ...
   };

   struct ordered_event {
     u64               timestamp;
     u64               file_offset;
     union perf_event  *event;
     struct list_head  list;
   };

This is quite convoluted and error prone as demonstrated by free-ing
issue discovered and fixed by Stephane in here [1].

This patch adds the 'struct ordered_events_buffer' object, that holds
the buffer data and frees it up properly.

[1] - https://marc.info/?l=linux-kernel&m=153376761329335&w=2



Reported-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Tested-by: default avatarStephane Eranian <eranian@google.com>
Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180907102455.7030-1-jolsa@kernel.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 2e85d597
Loading
Loading
Loading
Loading
+70 −13
Original line number Diff line number Diff line
@@ -80,14 +80,20 @@ static union perf_event *dup_event(struct ordered_events *oe,
	return oe->copy_on_queue ? __dup_event(oe, event) : event;
}

static void free_dup_event(struct ordered_events *oe, union perf_event *event)
static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
{
	if (event && oe->copy_on_queue) {
	if (event) {
		oe->cur_alloc_size -= event->header.size;
		free(event);
	}
}

static void free_dup_event(struct ordered_events *oe, union perf_event *event)
{
	if (oe->copy_on_queue)
		__free_dup_event(oe, event);
}

#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
static struct ordered_event *alloc_event(struct ordered_events *oe,
					 union perf_event *event)
@@ -100,15 +106,43 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
	if (!new_event)
		return NULL;

	/*
	 * We maintain the following scheme of buffers for ordered
	 * event allocation:
	 *
	 *   to_free list -> buffer1 (64K)
	 *                   buffer2 (64K)
	 *                   ...
	 *
	 * Each buffer keeps an array of ordered events objects:
	 *    buffer -> event[0]
	 *              event[1]
	 *              ...
	 *
	 * Each allocated ordered event is linked to one of
	 * following lists:
	 *   - time ordered list 'events'
	 *   - list of currently removed events 'cache'
	 *
	 * Allocation of the ordered event uses the following order
	 * to get the memory:
	 *   - use recently removed object from 'cache' list
	 *   - use available object in current allocation buffer
	 *   - allocate new buffer if the current buffer is full
	 *
	 * Removal of ordered event object moves it from events to
	 * the cache list.
	 */
	if (!list_empty(cache)) {
		new = list_entry(cache->next, struct ordered_event, list);
		list_del(&new->list);
	} else if (oe->buffer) {
		new = oe->buffer + oe->buffer_idx;
		new = &oe->buffer->event[oe->buffer_idx];
		if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
			oe->buffer = NULL;
	} else if (oe->cur_alloc_size < oe->max_alloc_size) {
		size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
		size_t size = sizeof(*oe->buffer) +
			      MAX_SAMPLE_BUFFER * sizeof(*new);

		oe->buffer = malloc(size);
		if (!oe->buffer) {
@@ -122,11 +156,11 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
		oe->cur_alloc_size += size;
		list_add(&oe->buffer->list, &oe->to_free);

		/* First entry is abused to maintain the to_free list. */
		oe->buffer_idx = 2;
		new = oe->buffer + 1;
		oe->buffer_idx = 1;
		new = &oe->buffer->event[0];
	} else {
		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
		return NULL;
	}

	new->event = new_event;
@@ -300,15 +334,38 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d
	oe->deliver	   = deliver;
}

static void
ordered_events_buffer__free(struct ordered_events_buffer *buffer,
			    unsigned int max, struct ordered_events *oe)
{
	if (oe->copy_on_queue) {
		unsigned int i;

		for (i = 0; i < max; i++)
			__free_dup_event(oe, buffer->event[i].event);
	}

	free(buffer);
}

void ordered_events__free(struct ordered_events *oe)
{
	while (!list_empty(&oe->to_free)) {
		struct ordered_event *event;
	struct ordered_events_buffer *buffer, *tmp;

		event = list_entry(oe->to_free.next, struct ordered_event, list);
		list_del(&event->list);
		free_dup_event(oe, event->event);
		free(event);
	if (list_empty(&oe->to_free))
		return;

	/*
	 * Current buffer might not have all the events allocated
	 * yet, we need to free only allocated ones ...
	 */
	list_del(&oe->buffer->list);
	ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);

	/* ... and continue with the rest */
	list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
		list_del(&buffer->list);
		ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
	}
}

+21 −16
Original line number Diff line number Diff line
@@ -25,6 +25,11 @@ struct ordered_events;
typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
					 struct ordered_event *event);

struct ordered_events_buffer {
	struct list_head	list;
	struct ordered_event	event[0];
};

struct ordered_events {
	u64				 last_flush;
	u64				 next_flush;
@@ -34,7 +39,7 @@ struct ordered_events {
	struct list_head		 events;
	struct list_head		 cache;
	struct list_head		 to_free;
	struct ordered_event	*buffer;
	struct ordered_events_buffer	*buffer;
	struct ordered_event		*last;
	ordered_events__deliver_t	 deliver;
	int				 buffer_idx;