Commit 138c4ae9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  tools, slub: Fix off-by-one buffer corruption after readlink() call
  slub: Discard slab page when node partial > minimum partial number
  slub: correct comments error for per cpu partial
  mm: restrict access to slab files under procfs and sysfs
  slub: Code optimization in get_partial_node()
  slub: doc: update the slabinfo.c file path
  slub: explicitly document position of inserting slab to partial list
  slub: update slabinfo tools to report per cpu partial list statistics
  slub: per cpu cache for partial pages
  slub: return object pointer from get_partial() / new_slab().
  slub: pass kmem_cache_cpu pointer to get_partial()
  slub: Prepare inuse field in new_slab()
  slub: Remove useless statements in __slab_alloc
  slub: free slabs without holding locks
  slub: use print_hex_dump
  slab: use print_hex_dump
parents 3b3dd79d e182a345
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@ page_migration
	- description of page migration in NUMA systems.
pagemap.txt
	- pagemap, from the userspace perspective
slabinfo.c
	- source code for a tool to get reports about slabs.
slub.txt
	- a short users guide for SLUB.
unevictable-lru.txt
+13 −1
Original line number Diff line number Diff line
@@ -79,9 +79,21 @@ struct page {
	};

	/* Third double word block */
	union {
		struct list_head lru;	/* Pageout list, eg. active_list
					 * protected by zone->lru_lock !
					 */
		struct {		/* slub per cpu partial pages */
			struct page *next;	/* Next partial slab */
#ifdef CONFIG_64BIT
			int pages;	/* Nr of partial slabs left */
			int pobjects;	/* Approximate # of objects */
#else
			short int pages;
			short int pobjects;
#endif
		};
	};

	/* Remainder is not double word aligned */
	union {
+4 −0
Original line number Diff line number Diff line
@@ -36,12 +36,15 @@ enum stat_item {
	ORDER_FALLBACK,		/* Number of times fallback was necessary */
	CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
	CMPXCHG_DOUBLE_FAIL,	/* Number of times that cmpxchg double did not match */
	CPU_PARTIAL_ALLOC,	/* Used cpu partial on alloc */
	CPU_PARTIAL_FREE,	/* USed cpu partial on free */
	NR_SLUB_STAT_ITEMS };

struct kmem_cache_cpu {
	void **freelist;	/* Pointer to next available object */
	unsigned long tid;	/* Globally unique transaction id */
	struct page *page;	/* The slab from which we are allocating */
	struct page *partial;	/* Partially allocated frozen slabs */
	int node;		/* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
	unsigned stat[NR_SLUB_STAT_ITEMS];
@@ -79,6 +82,7 @@ struct kmem_cache {
	int size;		/* The size of an object including meta data */
	int objsize;		/* The size of an object without meta data */
	int offset;		/* Free pointer offset. */
	int cpu_partial;	/* Number of per cpu partial objects to keep around */
	struct kmem_cache_order_objects oo;

	/* Allocation and freeing of slabs */
+7 −12
Original line number Diff line number Diff line
@@ -1857,9 +1857,9 @@ static void dump_line(char *data, int offset, int limit)
			error = data[offset + i];
			bad_count++;
		}
		printk(" %02x", (unsigned char)data[offset + i]);
	}
	printk("\n");
	print_hex_dump(KERN_CONT, "", 0, 16, 1,
			&data[offset], limit, 1);

	if (bad_count == 1) {
		error ^= POISON_FREE;
@@ -3039,14 +3039,9 @@ bad:
		printk(KERN_ERR "slab: Internal list corruption detected in "
				"cache '%s'(%d), slabp %p(%d). Hexdump:\n",
			cachep->name, cachep->num, slabp, slabp->inuse);
		for (i = 0;
		     i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
		     i++) {
			if (i % 16 == 0)
				printk("\n%03x:", i);
			printk(" %02x", ((unsigned char *)slabp)[i]);
		}
		printk("\n");
		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
			sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
			1);
		BUG();
	}
}
@@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = {

static int __init slab_proc_init(void)
{
	proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
	proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
#ifdef CONFIG_DEBUG_SLAB_LEAK
	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
#endif
+392 −166
Original line number Diff line number Diff line
@@ -467,34 +467,8 @@ static int disable_higher_order_debug;
 */
static void print_section(char *text, u8 *addr, unsigned int length)
{
	int i, offset;
	int newline = 1;
	char ascii[17];

	ascii[16] = 0;

	for (i = 0; i < length; i++) {
		if (newline) {
			printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
			newline = 0;
		}
		printk(KERN_CONT " %02x", addr[i]);
		offset = i % 16;
		ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
		if (offset == 15) {
			printk(KERN_CONT " %s\n", ascii);
			newline = 1;
		}
	}
	if (!newline) {
		i %= 16;
		while (i < 16) {
			printk(KERN_CONT "   ");
			ascii[i] = ' ';
			i++;
		}
		printk(KERN_CONT " %s\n", ascii);
	}
	print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
			length, 1);
}

static struct track *get_track(struct kmem_cache *s, void *object,
@@ -627,8 +601,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
	if (p > addr + 16)
		print_section("Bytes b4 ", p - 16, 16);

	print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));

	print_section("Object ", p, min_t(unsigned long, s->objsize,
				PAGE_SIZE));
	if (s->flags & SLAB_RED_ZONE)
		print_section("Redzone ", p + s->objsize,
			s->inuse - s->objsize);
@@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
	set_freepointer(s, last, NULL);

	page->freelist = start;
	page->inuse = 0;
	page->inuse = page->objects;
	page->frozen = 1;
out:
	return page;
@@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
				struct page *page, int tail)
{
	n->nr_partial++;
	if (tail)
	if (tail == DEACTIVATE_TO_TAIL)
		list_add_tail(&page->lru, &n->partial);
	else
		list_add(&page->lru, &n->partial);
@@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
 * Lock slab, remove from the partial list and put the object into the
 * per cpu freelist.
 *
 * Returns a list of objects or NULL if it fails.
 *
 * Must hold list_lock.
 */
static inline int acquire_slab(struct kmem_cache *s,
		struct kmem_cache_node *n, struct page *page)
static inline void *acquire_slab(struct kmem_cache *s,
		struct kmem_cache_node *n, struct page *page,
		int mode)
{
	void *freelist;
	unsigned long counters;
@@ -1572,6 +1549,7 @@ static inline int acquire_slab(struct kmem_cache *s,
		freelist = page->freelist;
		counters = page->counters;
		new.counters = counters;
		if (mode)
			new.inuse = page->objects;

		VM_BUG_ON(new.frozen);
@@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s,
			"lock and freeze"));

	remove_partial(n, page);

	if (freelist) {
		/* Populate the per cpu freelist */
		this_cpu_write(s->cpu_slab->freelist, freelist);
		this_cpu_write(s->cpu_slab->page, page);
		this_cpu_write(s->cpu_slab->node, page_to_nid(page));
		return 1;
	} else {
		/*
		 * Slab page came from the wrong list. No object to allocate
		 * from. Put it onto the correct list and continue partial
		 * scan.
		 */
		printk(KERN_ERR "SLUB: %s : Page without available objects on"
			" partial list\n", s->name);
		return 0;
	}
	return freelist;
}

static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);

/*
 * Try to allocate a partial slab from a specific node.
 */
static struct page *get_partial_node(struct kmem_cache *s,
					struct kmem_cache_node *n)
static void *get_partial_node(struct kmem_cache *s,
		struct kmem_cache_node *n, struct kmem_cache_cpu *c)
{
	struct page *page;
	struct page *page, *page2;
	void *object = NULL;

	/*
	 * Racy check. If we mistakenly see no partial slabs then we
@@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s,
		return NULL;

	spin_lock(&n->list_lock);
	list_for_each_entry(page, &n->partial, lru)
		if (acquire_slab(s, n, page))
			goto out;
	page = NULL;
out:
	list_for_each_entry_safe(page, page2, &n->partial, lru) {
		void *t = acquire_slab(s, n, page, object == NULL);
		int available;

		if (!t)
			break;

		if (!object) {
			c->page = page;
			c->node = page_to_nid(page);
			stat(s, ALLOC_FROM_PARTIAL);
			object = t;
			available =  page->objects - page->inuse;
		} else {
			page->freelist = t;
			available = put_cpu_partial(s, page, 0);
		}
		if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
			break;

	}
	spin_unlock(&n->list_lock);
	return page;
	return object;
}

/*
 * Get a page from somewhere. Search in increasing NUMA distances.
 */
static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
		struct kmem_cache_cpu *c)
{
#ifdef CONFIG_NUMA
	struct zonelist *zonelist;
	struct zoneref *z;
	struct zone *zone;
	enum zone_type high_zoneidx = gfp_zone(flags);
	struct page *page;
	void *object;

	/*
	 * The defrag ratio allows a configuration of the tradeoffs between
@@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)

		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
				n->nr_partial > s->min_partial) {
			page = get_partial_node(s, n);
			if (page) {
			object = get_partial_node(s, n, c);
			if (object) {
				put_mems_allowed();
				return page;
				return object;
			}
		}
	}
@@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
/*
 * Get a partial page, lock it and return it.
 */
static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
		struct kmem_cache_cpu *c)
{
	struct page *page;
	void *object;
	int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;

	page = get_partial_node(s, get_node(s, searchnode));
	if (page || node != NUMA_NO_NODE)
		return page;
	object = get_partial_node(s, get_node(s, searchnode), c);
	if (object || node != NUMA_NO_NODE)
		return object;

	return get_any_partial(s, flags);
	return get_any_partial(s, flags, c);
}

#ifdef CONFIG_PREEMPT
@@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
	for_each_possible_cpu(cpu)
		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
}
/*
 * Remove the cpu slab
 */

/*
 * Remove the cpu slab
@@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
	enum slab_modes l = M_NONE, m = M_NONE;
	void *freelist;
	void *nextfree;
	int tail = 0;
	int tail = DEACTIVATE_TO_HEAD;
	struct page new;
	struct page old;

	if (page->freelist) {
		stat(s, DEACTIVATE_REMOTE_FREES);
		tail = 1;
		tail = DEACTIVATE_TO_TAIL;
	}

	c->tid = next_tid(c->tid);
@@ -1893,7 +1873,7 @@ redo:
		if (m == M_PARTIAL) {

			add_partial(n, page, tail);
			stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
			stat(s, tail);

		} else if (m == M_FULL) {

@@ -1920,6 +1900,123 @@ redo:
	}
}

/* Unfreeze all the cpu partial slabs */
static void unfreeze_partials(struct kmem_cache *s)
{
	struct kmem_cache_node *n = NULL;
	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
	struct page *page;

	while ((page = c->partial)) {
		enum slab_modes { M_PARTIAL, M_FREE };
		enum slab_modes l, m;
		struct page new;
		struct page old;

		c->partial = page->next;
		l = M_FREE;

		do {

			old.freelist = page->freelist;
			old.counters = page->counters;
			VM_BUG_ON(!old.frozen);

			new.counters = old.counters;
			new.freelist = old.freelist;

			new.frozen = 0;

			if (!new.inuse && (!n || n->nr_partial > s->min_partial))
				m = M_FREE;
			else {
				struct kmem_cache_node *n2 = get_node(s,
							page_to_nid(page));

				m = M_PARTIAL;
				if (n != n2) {
					if (n)
						spin_unlock(&n->list_lock);

					n = n2;
					spin_lock(&n->list_lock);
				}
			}

			if (l != m) {
				if (l == M_PARTIAL)
					remove_partial(n, page);
				else
					add_partial(n, page, 1);

				l = m;
			}

		} while (!cmpxchg_double_slab(s, page,
				old.freelist, old.counters,
				new.freelist, new.counters,
				"unfreezing slab"));

		if (m == M_FREE) {
			stat(s, DEACTIVATE_EMPTY);
			discard_slab(s, page);
			stat(s, FREE_SLAB);
		}
	}

	if (n)
		spin_unlock(&n->list_lock);
}

/*
 * Put a page that was just frozen (in __slab_free) into a partial page
 * slot if available. This is done without interrupts disabled and without
 * preemption disabled. The cmpxchg is racy and may put the partial page
 * onto a random cpus partial slot.
 *
 * If we did not find a slot then simply move all the partials to the
 * per node partial list.
 */
int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
	struct page *oldpage;
	int pages;
	int pobjects;

	do {
		pages = 0;
		pobjects = 0;
		oldpage = this_cpu_read(s->cpu_slab->partial);

		if (oldpage) {
			pobjects = oldpage->pobjects;
			pages = oldpage->pages;
			if (drain && pobjects > s->cpu_partial) {
				unsigned long flags;
				/*
				 * partial array is full. Move the existing
				 * set to the per node partial list.
				 */
				local_irq_save(flags);
				unfreeze_partials(s);
				local_irq_restore(flags);
				pobjects = 0;
				pages = 0;
			}
		}

		pages++;
		pobjects += page->objects - page->inuse;

		page->pages = pages;
		page->pobjects = pobjects;
		page->next = oldpage;

	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
	stat(s, CPU_PARTIAL_FREE);
	return pobjects;
}

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
	stat(s, CPUSLAB_FLUSH);
@@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

	if (likely(c && c->page))
	if (likely(c)) {
		if (c->page)
			flush_slab(s, c);

		unfreeze_partials(s);
	}
}

static void flush_cpu_slab(void *d)
@@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
	}
}

static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
			int node, struct kmem_cache_cpu **pc)
{
	void *object;
	struct kmem_cache_cpu *c;
	struct page *page = new_slab(s, flags, node);

	if (page) {
		c = __this_cpu_ptr(s->cpu_slab);
		if (c->page)
			flush_slab(s, c);

		/*
		 * No other reference to the page yet so we can
		 * muck around with it freely without cmpxchg
		 */
		object = page->freelist;
		page->freelist = NULL;

		stat(s, ALLOC_SLAB);
		c->node = page_to_nid(page);
		c->page = page;
		*pc = c;
	} else
		object = NULL;

	return object;
}

/*
 * Slow path. The lockless freelist is empty or we need to perform
 * debugging duties.
 *
 * Interrupts are disabled.
 *
 * Processing is still very fast if new objects have been freed to the
 * regular freelist. In that case we simply take over the regular freelist
 * as the lockless freelist and zap the regular freelist.
@@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
			  unsigned long addr, struct kmem_cache_cpu *c)
{
	void **object;
	struct page *page;
	unsigned long flags;
	struct page new;
	unsigned long counters;
@@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	c = this_cpu_ptr(s->cpu_slab);
#endif

	/* We handle __GFP_ZERO in the caller */
	gfpflags &= ~__GFP_ZERO;

	page = c->page;
	if (!page)
	if (!c->page)
		goto new_slab;

redo:
	if (unlikely(!node_match(c, node))) {
		stat(s, ALLOC_NODE_MISMATCH);
		deactivate_slab(s, c);
@@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	stat(s, ALLOC_SLOWPATH);

	do {
		object = page->freelist;
		counters = page->counters;
		object = c->page->freelist;
		counters = c->page->counters;
		new.counters = counters;
		VM_BUG_ON(!new.frozen);

@@ -2095,15 +2218,15 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
		 * and use them to refill the per cpu queue.
		 */

		new.inuse = page->objects;
		new.inuse = c->page->objects;
		new.frozen = object != NULL;

	} while (!__cmpxchg_double_slab(s, page,
	} while (!__cmpxchg_double_slab(s, c->page,
			object, counters,
			NULL, new.counters,
			"__slab_alloc"));

	if (unlikely(!object)) {
	if (!object) {
		c->page = NULL;
		stat(s, DEACTIVATE_BYPASS);
		goto new_slab;
@@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	stat(s, ALLOC_REFILL);

load_freelist:
	VM_BUG_ON(!page->frozen);
	c->freelist = get_freepointer(s, object);
	c->tid = next_tid(c->tid);
	local_irq_restore(flags);
	return object;

new_slab:
	page = get_partial(s, gfpflags, node);
	if (page) {
		stat(s, ALLOC_FROM_PARTIAL);
		object = c->freelist;

		if (kmem_cache_debug(s))
			goto debug;
		goto load_freelist;
	if (c->partial) {
		c->page = c->partial;
		c->partial = c->page->next;
		c->node = page_to_nid(c->page);
		stat(s, CPU_PARTIAL_ALLOC);
		c->freelist = NULL;
		goto redo;
	}

	page = new_slab(s, gfpflags, node);
	/* Then do expensive stuff like retrieving pages from the partial lists */
	object = get_partial(s, gfpflags, node, c);

	if (page) {
		c = __this_cpu_ptr(s->cpu_slab);
		if (c->page)
			flush_slab(s, c);

		/*
		 * No other reference to the page yet so we can
		 * muck around with it freely without cmpxchg
		 */
		object = page->freelist;
		page->freelist = NULL;
		page->inuse = page->objects;
	if (unlikely(!object)) {

		stat(s, ALLOC_SLAB);
		c->node = page_to_nid(page);
		c->page = page;
		object = new_slab_objects(s, gfpflags, node, &c);

		if (kmem_cache_debug(s))
			goto debug;
		goto load_freelist;
	}
		if (unlikely(!object)) {
			if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
				slab_out_of_memory(s, gfpflags, node);

			local_irq_restore(flags);
			return NULL;
		}
	}

debug:
	if (!object || !alloc_debug_processing(s, page, object, addr))
		goto new_slab;
	if (likely(!kmem_cache_debug(s)))
		goto load_freelist;

	/* Only entered in the debug case */
	if (!alloc_debug_processing(s, c->page, object, addr))
		goto new_slab;	/* Slab failed checks. Next slab needed */

	c->freelist = get_freepointer(s, object);
	deactivate_slab(s, c);
	c->page = NULL;
	c->node = NUMA_NO_NODE;
	local_irq_restore(flags);
	return object;
@@ -2333,6 +2445,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		was_frozen = new.frozen;
		new.inuse--;
		if ((!new.inuse || !prior) && !was_frozen && !n) {

			if (!kmem_cache_debug(s) && !prior)

				/*
				 * Slab was on no list before and will be partially empty
				 * We can defer the list move and instead freeze it.
				 */
				new.frozen = 1;

			else { /* Needs to be taken off a list */

	                        n = get_node(s, page_to_nid(page));
				/*
				 * Speculatively acquire the list_lock.
@@ -2343,6 +2466,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
				 * other processors updating the list of slabs.
				 */
				spin_lock_irqsave(&n->list_lock, flags);

			}
		}
		inuse = new.inuse;

@@ -2352,6 +2477,14 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		"__slab_free"));

	if (likely(!n)) {

		/*
		 * If we just froze the page then put it onto the
		 * per cpu partial list.
		 */
		if (new.frozen && !was_frozen)
			put_cpu_partial(s, page, 1);

		/*
		 * The list lock was not taken therefore no list
		 * activity can be necessary.
@@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		 */
		if (unlikely(!prior)) {
			remove_full(s, page);
			add_partial(n, page, 1);
			add_partial(n, page, DEACTIVATE_TO_TAIL);
			stat(s, FREE_ADD_PARTIAL);
		}
	}
@@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
	slab_free_hook(s, x);

redo:

	/*
	 * Determine the currently cpus per cpu slab.
	 * The cpu may change afterward. However that does not matter since
@@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node)
	n = page->freelist;
	BUG_ON(!n);
	page->freelist = get_freepointer(kmem_cache_node, n);
	page->inuse++;
	page->inuse = 1;
	page->frozen = 0;
	kmem_cache_node->node[node] = n;
#ifdef CONFIG_SLUB_DEBUG
@@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node)
	init_kmem_cache_node(n, kmem_cache_node);
	inc_slabs_node(kmem_cache_node, node, page->objects);

	add_partial(n, page, 0);
	add_partial(n, page, DEACTIVATE_TO_HEAD);
}

static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s,
	 * The larger the object size is, the more pages we want on the partial
	 * list to avoid pounding the page allocator excessively.
	 */
	set_min_partial(s, ilog2(s->size));
	set_min_partial(s, ilog2(s->size) / 2);

	/*
	 * cpu_partial determined the maximum number of objects kept in the
	 * per cpu partial lists of a processor.
	 *
	 * Per cpu partial lists mainly contain slabs that just have one
	 * object freed. If they are used for allocation then they can be
	 * filled up again with minimal effort. The slab will never hit the
	 * per node partial lists and therefore no locking will be required.
	 *
	 * This setting also determines
	 *
	 * A) The number of objects from per cpu partial slabs dumped to the
	 *    per node list when we reach the limit.
	 * B) The number of objects in cpu partial slabs to extract from the
	 *    per node list when we run out of per cpu objects. We only fetch 50%
	 *    to keep some capacity around for frees.
	 */
	if (s->size >= PAGE_SIZE)
		s->cpu_partial = 2;
	else if (s->size >= 1024)
		s->cpu_partial = 6;
	else if (s->size >= 256)
		s->cpu_partial = 13;
	else
		s->cpu_partial = 30;

	s->refcount = 1;
#ifdef CONFIG_NUMA
	s->remote_node_defrag_ratio = 1000;
@@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,

/*
 * Attempt to free all partial slabs on a node.
 * This is called from kmem_cache_close(). We must be the last thread
 * using the cache and therefore we do not need to lock anymore.
 */
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
	unsigned long flags;
	struct page *page, *h;

	spin_lock_irqsave(&n->list_lock, flags);
	list_for_each_entry_safe(page, h, &n->partial, lru) {
		if (!page->inuse) {
			remove_partial(n, page);
@@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
				"Objects remaining on kmem_cache_close()");
		}
	}
	spin_unlock_irqrestore(&n->list_lock, flags);
}

/*
@@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
	s->refcount--;
	if (!s->refcount) {
		list_del(&s->list);
		up_write(&slub_lock);
		if (kmem_cache_close(s)) {
			printk(KERN_ERR "SLUB %s: %s called for cache that "
				"still has objects.\n", s->name, __func__);
@@ -3028,7 +3187,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
		if (s->flags & SLAB_DESTROY_BY_RCU)
			rcu_barrier();
		sysfs_slab_remove(s);
	}
	} else
		up_write(&slub_lock);
}
EXPORT_SYMBOL(kmem_cache_destroy);
@@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s)
		 * list_lock. page->inuse here is the upper limit.
		 */
		list_for_each_entry_safe(page, t, &n->partial, lru) {
			if (!page->inuse) {
				remove_partial(n, page);
				discard_slab(s, page);
			} else {
				list_move(&page->lru,
				slabs_by_inuse + page->inuse);
			}
			list_move(&page->lru, slabs_by_inuse + page->inuse);
			if (!page->inuse)
				n->nr_partial--;
		}

		/*
		 * Rebuild the partial list with the slabs filled up most
		 * first and the least used slabs at the end.
		 */
		for (i = objects - 1; i >= 0; i--)
		for (i = objects - 1; i > 0; i--)
			list_splice(slabs_by_inuse + i, n->partial.prev);

		spin_unlock_irqrestore(&n->list_lock, flags);

		/* Release empty slabs */
		list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
			discard_slab(s, page);
	}

	kfree(slabs_by_inuse);
@@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,

		for_each_possible_cpu(cpu) {
			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
			struct page *page;

			if (!c || c->node < 0)
				continue;
@@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
				total += x;
				nodes[c->node] += x;
			}
			page = c->partial;

			if (page) {
				x = page->pobjects;
                                total += x;
                                nodes[c->node] += x;
			}
			per_cpu[c->node]++;
		}
	}
@@ -4412,11 +4579,12 @@ struct slab_attribute {
};

#define SLAB_ATTR_RO(_name) \
	static struct slab_attribute _name##_attr = __ATTR_RO(_name)
	static struct slab_attribute _name##_attr = \
	__ATTR(_name, 0400, _name##_show, NULL)

#define SLAB_ATTR(_name) \
	static struct slab_attribute _name##_attr =  \
	__ATTR(_name, 0644, _name##_show, _name##_store)
	__ATTR(_name, 0600, _name##_show, _name##_store)

static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
{
@@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
}
SLAB_ATTR(min_partial);

static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
	return sprintf(buf, "%u\n", s->cpu_partial);
}

static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
				 size_t length)
{
	unsigned long objects;
	int err;

	err = strict_strtoul(buf, 10, &objects);
	if (err)
		return err;

	s->cpu_partial = objects;
	flush_all(s);
	return length;
}
SLAB_ATTR(cpu_partial);

static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
	if (!s->ctor)
@@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO(objects_partial);

static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
	int objects = 0;
	int pages = 0;
	int cpu;
	int len;

	for_each_online_cpu(cpu) {
		struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;

		if (page) {
			pages += page->pages;
			objects += page->pobjects;
		}
	}

	len = sprintf(buf, "%d(%d)", objects, pages);

#ifdef CONFIG_SMP
	for_each_online_cpu(cpu) {
		struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;

		if (page && len < PAGE_SIZE - 20)
			len += sprintf(buf + len, " C%d=%d(%d)", cpu,
				page->pobjects, page->pages);
	}
#endif
	return len + sprintf(buf + len, "\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
STAT_ATTR(ORDER_FALLBACK, order_fallback);
STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
#endif

static struct attribute *slab_attrs[] = {
@@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = {
	&objs_per_slab_attr.attr,
	&order_attr.attr,
	&min_partial_attr.attr,
	&cpu_partial_attr.attr,
	&objects_attr.attr,
	&objects_partial_attr.attr,
	&partial_attr.attr,
@@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = {
	&destroy_by_rcu_attr.attr,
	&shrink_attr.attr,
	&reserved_attr.attr,
	&slabs_cpu_partial_attr.attr,
#ifdef CONFIG_SLUB_DEBUG
	&total_objects_attr.attr,
	&slabs_attr.attr,
@@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = {
	&order_fallback_attr.attr,
	&cmpxchg_double_fail_attr.attr,
	&cmpxchg_double_cpu_fail_attr.attr,
	&cpu_partial_alloc_attr.attr,
	&cpu_partial_free_attr.attr,
#endif
#ifdef CONFIG_FAILSLAB
	&failslab_attr.attr,
@@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = {

static int __init slab_proc_init(void)
{
	proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
	return 0;
}
module_init(slab_proc_init);
Loading