Commit 138c4ae9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  tools, slub: Fix off-by-one buffer corruption after readlink() call
  slub: Discard slab page when node partial > minimum partial number
  slub: correct comments error for per cpu partial
  mm: restrict access to slab files under procfs and sysfs
  slub: Code optimization in get_partial_node()
  slub: doc: update the slabinfo.c file path
  slub: explicitly document position of inserting slab to partial list
  slub: update slabinfo tools to report per cpu partial list statistics
  slub: per cpu cache for partial pages
  slub: return object pointer from get_partial() / new_slab().
  slub: pass kmem_cache_cpu pointer to get_partial()
  slub: Prepare inuse field in new_slab()
  slub: Remove useless statements in __slab_alloc
  slub: free slabs without holding locks
  slub: use print_hex_dump
  slab: use print_hex_dump
parents 3b3dd79d e182a345
Loading
Loading
Loading
Loading
+0 −2
Original line number Original line Diff line number Diff line
@@ -30,8 +30,6 @@ page_migration
	- description of page migration in NUMA systems.
	- description of page migration in NUMA systems.
pagemap.txt
pagemap.txt
	- pagemap, from the userspace perspective
	- pagemap, from the userspace perspective
slabinfo.c
	- source code for a tool to get reports about slabs.
slub.txt
slub.txt
	- a short users guide for SLUB.
	- a short users guide for SLUB.
unevictable-lru.txt
unevictable-lru.txt
+13 −1
Original line number Original line Diff line number Diff line
@@ -79,9 +79,21 @@ struct page {
	};
	};


	/* Third double word block */
	/* Third double word block */
	union {
		struct list_head lru;	/* Pageout list, eg. active_list
		struct list_head lru;	/* Pageout list, eg. active_list
					 * protected by zone->lru_lock !
					 * protected by zone->lru_lock !
					 */
					 */
		struct {		/* slub per cpu partial pages */
			struct page *next;	/* Next partial slab */
#ifdef CONFIG_64BIT
			int pages;	/* Nr of partial slabs left */
			int pobjects;	/* Approximate # of objects */
#else
			short int pages;
			short int pobjects;
#endif
		};
	};


	/* Remainder is not double word aligned */
	/* Remainder is not double word aligned */
	union {
	union {
+4 −0
Original line number Original line Diff line number Diff line
@@ -36,12 +36,15 @@ enum stat_item {
	ORDER_FALLBACK,		/* Number of times fallback was necessary */
	ORDER_FALLBACK,		/* Number of times fallback was necessary */
	CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
	CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
	CMPXCHG_DOUBLE_FAIL,	/* Number of times that cmpxchg double did not match */
	CMPXCHG_DOUBLE_FAIL,	/* Number of times that cmpxchg double did not match */
	CPU_PARTIAL_ALLOC,	/* Used cpu partial on alloc */
	CPU_PARTIAL_FREE,	/* USed cpu partial on free */
	NR_SLUB_STAT_ITEMS };
	NR_SLUB_STAT_ITEMS };


struct kmem_cache_cpu {
struct kmem_cache_cpu {
	void **freelist;	/* Pointer to next available object */
	void **freelist;	/* Pointer to next available object */
	unsigned long tid;	/* Globally unique transaction id */
	unsigned long tid;	/* Globally unique transaction id */
	struct page *page;	/* The slab from which we are allocating */
	struct page *page;	/* The slab from which we are allocating */
	struct page *partial;	/* Partially allocated frozen slabs */
	int node;		/* The node of the page (or -1 for debug) */
	int node;		/* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
#ifdef CONFIG_SLUB_STATS
	unsigned stat[NR_SLUB_STAT_ITEMS];
	unsigned stat[NR_SLUB_STAT_ITEMS];
@@ -79,6 +82,7 @@ struct kmem_cache {
	int size;		/* The size of an object including meta data */
	int size;		/* The size of an object including meta data */
	int objsize;		/* The size of an object without meta data */
	int objsize;		/* The size of an object without meta data */
	int offset;		/* Free pointer offset. */
	int offset;		/* Free pointer offset. */
	int cpu_partial;	/* Number of per cpu partial objects to keep around */
	struct kmem_cache_order_objects oo;
	struct kmem_cache_order_objects oo;


	/* Allocation and freeing of slabs */
	/* Allocation and freeing of slabs */
+7 −12
Original line number Original line Diff line number Diff line
@@ -1857,9 +1857,9 @@ static void dump_line(char *data, int offset, int limit)
			error = data[offset + i];
			error = data[offset + i];
			bad_count++;
			bad_count++;
		}
		}
		printk(" %02x", (unsigned char)data[offset + i]);
	}
	}
	printk("\n");
	print_hex_dump(KERN_CONT, "", 0, 16, 1,
			&data[offset], limit, 1);


	if (bad_count == 1) {
	if (bad_count == 1) {
		error ^= POISON_FREE;
		error ^= POISON_FREE;
@@ -3039,14 +3039,9 @@ bad:
		printk(KERN_ERR "slab: Internal list corruption detected in "
		printk(KERN_ERR "slab: Internal list corruption detected in "
				"cache '%s'(%d), slabp %p(%d). Hexdump:\n",
				"cache '%s'(%d), slabp %p(%d). Hexdump:\n",
			cachep->name, cachep->num, slabp, slabp->inuse);
			cachep->name, cachep->num, slabp, slabp->inuse);
		for (i = 0;
		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
		     i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
			sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
		     i++) {
			1);
			if (i % 16 == 0)
				printk("\n%03x:", i);
			printk(" %02x", ((unsigned char *)slabp)[i]);
		}
		printk("\n");
		BUG();
		BUG();
	}
	}
}
}
@@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = {


static int __init slab_proc_init(void)
static int __init slab_proc_init(void)
{
{
	proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
	proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
#ifdef CONFIG_DEBUG_SLAB_LEAK
#ifdef CONFIG_DEBUG_SLAB_LEAK
	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
#endif
#endif
+392 −166
Original line number Original line Diff line number Diff line
@@ -467,34 +467,8 @@ static int disable_higher_order_debug;
 */
 */
static void print_section(char *text, u8 *addr, unsigned int length)
static void print_section(char *text, u8 *addr, unsigned int length)
{
{
	int i, offset;
	print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
	int newline = 1;
			length, 1);
	char ascii[17];

	ascii[16] = 0;

	for (i = 0; i < length; i++) {
		if (newline) {
			printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
			newline = 0;
		}
		printk(KERN_CONT " %02x", addr[i]);
		offset = i % 16;
		ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
		if (offset == 15) {
			printk(KERN_CONT " %s\n", ascii);
			newline = 1;
		}
	}
	if (!newline) {
		i %= 16;
		while (i < 16) {
			printk(KERN_CONT "   ");
			ascii[i] = ' ';
			i++;
		}
		printk(KERN_CONT " %s\n", ascii);
	}
}
}


static struct track *get_track(struct kmem_cache *s, void *object,
static struct track *get_track(struct kmem_cache *s, void *object,
@@ -627,8 +601,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
	if (p > addr + 16)
	if (p > addr + 16)
		print_section("Bytes b4 ", p - 16, 16);
		print_section("Bytes b4 ", p - 16, 16);


	print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
	print_section("Object ", p, min_t(unsigned long, s->objsize,

				PAGE_SIZE));
	if (s->flags & SLAB_RED_ZONE)
	if (s->flags & SLAB_RED_ZONE)
		print_section("Redzone ", p + s->objsize,
		print_section("Redzone ", p + s->objsize,
			s->inuse - s->objsize);
			s->inuse - s->objsize);
@@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
	set_freepointer(s, last, NULL);
	set_freepointer(s, last, NULL);


	page->freelist = start;
	page->freelist = start;
	page->inuse = 0;
	page->inuse = page->objects;
	page->frozen = 1;
	page->frozen = 1;
out:
out:
	return page;
	return page;
@@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
				struct page *page, int tail)
				struct page *page, int tail)
{
{
	n->nr_partial++;
	n->nr_partial++;
	if (tail)
	if (tail == DEACTIVATE_TO_TAIL)
		list_add_tail(&page->lru, &n->partial);
		list_add_tail(&page->lru, &n->partial);
	else
	else
		list_add(&page->lru, &n->partial);
		list_add(&page->lru, &n->partial);
@@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
 * Lock slab, remove from the partial list and put the object into the
 * Lock slab, remove from the partial list and put the object into the
 * per cpu freelist.
 * per cpu freelist.
 *
 *
 * Returns a list of objects or NULL if it fails.
 *
 * Must hold list_lock.
 * Must hold list_lock.
 */
 */
static inline int acquire_slab(struct kmem_cache *s,
static inline void *acquire_slab(struct kmem_cache *s,
		struct kmem_cache_node *n, struct page *page)
		struct kmem_cache_node *n, struct page *page,
		int mode)
{
{
	void *freelist;
	void *freelist;
	unsigned long counters;
	unsigned long counters;
@@ -1572,6 +1549,7 @@ static inline int acquire_slab(struct kmem_cache *s,
		freelist = page->freelist;
		freelist = page->freelist;
		counters = page->counters;
		counters = page->counters;
		new.counters = counters;
		new.counters = counters;
		if (mode)
			new.inuse = page->objects;
			new.inuse = page->objects;


		VM_BUG_ON(new.frozen);
		VM_BUG_ON(new.frozen);
@@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s,
			"lock and freeze"));
			"lock and freeze"));


	remove_partial(n, page);
	remove_partial(n, page);

	return freelist;
	if (freelist) {
		/* Populate the per cpu freelist */
		this_cpu_write(s->cpu_slab->freelist, freelist);
		this_cpu_write(s->cpu_slab->page, page);
		this_cpu_write(s->cpu_slab->node, page_to_nid(page));
		return 1;
	} else {
		/*
		 * Slab page came from the wrong list. No object to allocate
		 * from. Put it onto the correct list and continue partial
		 * scan.
		 */
		printk(KERN_ERR "SLUB: %s : Page without available objects on"
			" partial list\n", s->name);
		return 0;
	}
}
}


static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);

/*
/*
 * Try to allocate a partial slab from a specific node.
 * Try to allocate a partial slab from a specific node.
 */
 */
static struct page *get_partial_node(struct kmem_cache *s,
static void *get_partial_node(struct kmem_cache *s,
					struct kmem_cache_node *n)
		struct kmem_cache_node *n, struct kmem_cache_cpu *c)
{
{
	struct page *page;
	struct page *page, *page2;
	void *object = NULL;


	/*
	/*
	 * Racy check. If we mistakenly see no partial slabs then we
	 * Racy check. If we mistakenly see no partial slabs then we
@@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s,
		return NULL;
		return NULL;


	spin_lock(&n->list_lock);
	spin_lock(&n->list_lock);
	list_for_each_entry(page, &n->partial, lru)
	list_for_each_entry_safe(page, page2, &n->partial, lru) {
		if (acquire_slab(s, n, page))
		void *t = acquire_slab(s, n, page, object == NULL);
			goto out;
		int available;
	page = NULL;

out:
		if (!t)
			break;

		if (!object) {
			c->page = page;
			c->node = page_to_nid(page);
			stat(s, ALLOC_FROM_PARTIAL);
			object = t;
			available =  page->objects - page->inuse;
		} else {
			page->freelist = t;
			available = put_cpu_partial(s, page, 0);
		}
		if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
			break;

	}
	spin_unlock(&n->list_lock);
	spin_unlock(&n->list_lock);
	return page;
	return object;
}
}


/*
/*
 * Get a page from somewhere. Search in increasing NUMA distances.
 * Get a page from somewhere. Search in increasing NUMA distances.
 */
 */
static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
		struct kmem_cache_cpu *c)
{
{
#ifdef CONFIG_NUMA
#ifdef CONFIG_NUMA
	struct zonelist *zonelist;
	struct zonelist *zonelist;
	struct zoneref *z;
	struct zoneref *z;
	struct zone *zone;
	struct zone *zone;
	enum zone_type high_zoneidx = gfp_zone(flags);
	enum zone_type high_zoneidx = gfp_zone(flags);
	struct page *page;
	void *object;


	/*
	/*
	 * The defrag ratio allows a configuration of the tradeoffs between
	 * The defrag ratio allows a configuration of the tradeoffs between
@@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)


		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
				n->nr_partial > s->min_partial) {
				n->nr_partial > s->min_partial) {
			page = get_partial_node(s, n);
			object = get_partial_node(s, n, c);
			if (page) {
			if (object) {
				put_mems_allowed();
				put_mems_allowed();
				return page;
				return object;
			}
			}
		}
		}
	}
	}
@@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
/*
/*
 * Get a partial page, lock it and return it.
 * Get a partial page, lock it and return it.
 */
 */
static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
		struct kmem_cache_cpu *c)
{
{
	struct page *page;
	void *object;
	int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
	int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;


	page = get_partial_node(s, get_node(s, searchnode));
	object = get_partial_node(s, get_node(s, searchnode), c);
	if (page || node != NUMA_NO_NODE)
	if (object || node != NUMA_NO_NODE)
		return page;
		return object;


	return get_any_partial(s, flags);
	return get_any_partial(s, flags, c);
}
}


#ifdef CONFIG_PREEMPT
#ifdef CONFIG_PREEMPT
@@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
	for_each_possible_cpu(cpu)
	for_each_possible_cpu(cpu)
		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
}
}
/*
 * Remove the cpu slab
 */


/*
/*
 * Remove the cpu slab
 * Remove the cpu slab
@@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
	enum slab_modes l = M_NONE, m = M_NONE;
	enum slab_modes l = M_NONE, m = M_NONE;
	void *freelist;
	void *freelist;
	void *nextfree;
	void *nextfree;
	int tail = 0;
	int tail = DEACTIVATE_TO_HEAD;
	struct page new;
	struct page new;
	struct page old;
	struct page old;


	if (page->freelist) {
	if (page->freelist) {
		stat(s, DEACTIVATE_REMOTE_FREES);
		stat(s, DEACTIVATE_REMOTE_FREES);
		tail = 1;
		tail = DEACTIVATE_TO_TAIL;
	}
	}


	c->tid = next_tid(c->tid);
	c->tid = next_tid(c->tid);
@@ -1893,7 +1873,7 @@ redo:
		if (m == M_PARTIAL) {
		if (m == M_PARTIAL) {


			add_partial(n, page, tail);
			add_partial(n, page, tail);
			stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
			stat(s, tail);


		} else if (m == M_FULL) {
		} else if (m == M_FULL) {


@@ -1920,6 +1900,123 @@ redo:
	}
	}
}
}


/* Unfreeze all the cpu partial slabs */
static void unfreeze_partials(struct kmem_cache *s)
{
	struct kmem_cache_node *n = NULL;
	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
	struct page *page;

	while ((page = c->partial)) {
		enum slab_modes { M_PARTIAL, M_FREE };
		enum slab_modes l, m;
		struct page new;
		struct page old;

		c->partial = page->next;
		l = M_FREE;

		do {

			old.freelist = page->freelist;
			old.counters = page->counters;
			VM_BUG_ON(!old.frozen);

			new.counters = old.counters;
			new.freelist = old.freelist;

			new.frozen = 0;

			if (!new.inuse && (!n || n->nr_partial > s->min_partial))
				m = M_FREE;
			else {
				struct kmem_cache_node *n2 = get_node(s,
							page_to_nid(page));

				m = M_PARTIAL;
				if (n != n2) {
					if (n)
						spin_unlock(&n->list_lock);

					n = n2;
					spin_lock(&n->list_lock);
				}
			}

			if (l != m) {
				if (l == M_PARTIAL)
					remove_partial(n, page);
				else
					add_partial(n, page, 1);

				l = m;
			}

		} while (!cmpxchg_double_slab(s, page,
				old.freelist, old.counters,
				new.freelist, new.counters,
				"unfreezing slab"));

		if (m == M_FREE) {
			stat(s, DEACTIVATE_EMPTY);
			discard_slab(s, page);
			stat(s, FREE_SLAB);
		}
	}

	if (n)
		spin_unlock(&n->list_lock);
}

/*
 * Put a page that was just frozen (in __slab_free) into a partial page
 * slot if available. This is done without interrupts disabled and without
 * preemption disabled. The cmpxchg is racy and may put the partial page
 * onto a random cpus partial slot.
 *
 * If we did not find a slot then simply move all the partials to the
 * per node partial list.
 */
int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
	struct page *oldpage;
	int pages;
	int pobjects;

	do {
		pages = 0;
		pobjects = 0;
		oldpage = this_cpu_read(s->cpu_slab->partial);

		if (oldpage) {
			pobjects = oldpage->pobjects;
			pages = oldpage->pages;
			if (drain && pobjects > s->cpu_partial) {
				unsigned long flags;
				/*
				 * partial array is full. Move the existing
				 * set to the per node partial list.
				 */
				local_irq_save(flags);
				unfreeze_partials(s);
				local_irq_restore(flags);
				pobjects = 0;
				pages = 0;
			}
		}

		pages++;
		pobjects += page->objects - page->inuse;

		page->pages = pages;
		page->pobjects = pobjects;
		page->next = oldpage;

	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
	stat(s, CPU_PARTIAL_FREE);
	return pobjects;
}

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
{
	stat(s, CPUSLAB_FLUSH);
	stat(s, CPUSLAB_FLUSH);
@@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
{
	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);


	if (likely(c && c->page))
	if (likely(c)) {
		if (c->page)
			flush_slab(s, c);
			flush_slab(s, c);

		unfreeze_partials(s);
	}
}
}


static void flush_cpu_slab(void *d)
static void flush_cpu_slab(void *d)
@@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
	}
	}
}
}


static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
			int node, struct kmem_cache_cpu **pc)
{
	void *object;
	struct kmem_cache_cpu *c;
	struct page *page = new_slab(s, flags, node);

	if (page) {
		c = __this_cpu_ptr(s->cpu_slab);
		if (c->page)
			flush_slab(s, c);

		/*
		 * No other reference to the page yet so we can
		 * muck around with it freely without cmpxchg
		 */
		object = page->freelist;
		page->freelist = NULL;

		stat(s, ALLOC_SLAB);
		c->node = page_to_nid(page);
		c->page = page;
		*pc = c;
	} else
		object = NULL;

	return object;
}

/*
/*
 * Slow path. The lockless freelist is empty or we need to perform
 * Slow path. The lockless freelist is empty or we need to perform
 * debugging duties.
 * debugging duties.
 *
 *
 * Interrupts are disabled.
 *
 * Processing is still very fast if new objects have been freed to the
 * Processing is still very fast if new objects have been freed to the
 * regular freelist. In that case we simply take over the regular freelist
 * regular freelist. In that case we simply take over the regular freelist
 * as the lockless freelist and zap the regular freelist.
 * as the lockless freelist and zap the regular freelist.
@@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
			  unsigned long addr, struct kmem_cache_cpu *c)
			  unsigned long addr, struct kmem_cache_cpu *c)
{
{
	void **object;
	void **object;
	struct page *page;
	unsigned long flags;
	unsigned long flags;
	struct page new;
	struct page new;
	unsigned long counters;
	unsigned long counters;
@@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	c = this_cpu_ptr(s->cpu_slab);
	c = this_cpu_ptr(s->cpu_slab);
#endif
#endif


	/* We handle __GFP_ZERO in the caller */
	if (!c->page)
	gfpflags &= ~__GFP_ZERO;

	page = c->page;
	if (!page)
		goto new_slab;
		goto new_slab;

redo:
	if (unlikely(!node_match(c, node))) {
	if (unlikely(!node_match(c, node))) {
		stat(s, ALLOC_NODE_MISMATCH);
		stat(s, ALLOC_NODE_MISMATCH);
		deactivate_slab(s, c);
		deactivate_slab(s, c);
@@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	stat(s, ALLOC_SLOWPATH);
	stat(s, ALLOC_SLOWPATH);


	do {
	do {
		object = page->freelist;
		object = c->page->freelist;
		counters = page->counters;
		counters = c->page->counters;
		new.counters = counters;
		new.counters = counters;
		VM_BUG_ON(!new.frozen);
		VM_BUG_ON(!new.frozen);


@@ -2095,15 +2218,15 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
		 * and use them to refill the per cpu queue.
		 * and use them to refill the per cpu queue.
		 */
		 */


		new.inuse = page->objects;
		new.inuse = c->page->objects;
		new.frozen = object != NULL;
		new.frozen = object != NULL;


	} while (!__cmpxchg_double_slab(s, page,
	} while (!__cmpxchg_double_slab(s, c->page,
			object, counters,
			object, counters,
			NULL, new.counters,
			NULL, new.counters,
			"__slab_alloc"));
			"__slab_alloc"));


	if (unlikely(!object)) {
	if (!object) {
		c->page = NULL;
		c->page = NULL;
		stat(s, DEACTIVATE_BYPASS);
		stat(s, DEACTIVATE_BYPASS);
		goto new_slab;
		goto new_slab;
@@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
	stat(s, ALLOC_REFILL);
	stat(s, ALLOC_REFILL);


load_freelist:
load_freelist:
	VM_BUG_ON(!page->frozen);
	c->freelist = get_freepointer(s, object);
	c->freelist = get_freepointer(s, object);
	c->tid = next_tid(c->tid);
	c->tid = next_tid(c->tid);
	local_irq_restore(flags);
	local_irq_restore(flags);
	return object;
	return object;


new_slab:
new_slab:
	page = get_partial(s, gfpflags, node);
	if (page) {
		stat(s, ALLOC_FROM_PARTIAL);
		object = c->freelist;


		if (kmem_cache_debug(s))
	if (c->partial) {
			goto debug;
		c->page = c->partial;
		goto load_freelist;
		c->partial = c->page->next;
		c->node = page_to_nid(c->page);
		stat(s, CPU_PARTIAL_ALLOC);
		c->freelist = NULL;
		goto redo;
	}
	}


	page = new_slab(s, gfpflags, node);
	/* Then do expensive stuff like retrieving pages from the partial lists */
	object = get_partial(s, gfpflags, node, c);


	if (page) {
	if (unlikely(!object)) {
		c = __this_cpu_ptr(s->cpu_slab);
		if (c->page)
			flush_slab(s, c);

		/*
		 * No other reference to the page yet so we can
		 * muck around with it freely without cmpxchg
		 */
		object = page->freelist;
		page->freelist = NULL;
		page->inuse = page->objects;


		stat(s, ALLOC_SLAB);
		object = new_slab_objects(s, gfpflags, node, &c);
		c->node = page_to_nid(page);
		c->page = page;


		if (kmem_cache_debug(s))
		if (unlikely(!object)) {
			goto debug;
		goto load_freelist;
	}
			if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
			if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
				slab_out_of_memory(s, gfpflags, node);
				slab_out_of_memory(s, gfpflags, node);

			local_irq_restore(flags);
			local_irq_restore(flags);
			return NULL;
			return NULL;
		}
	}


debug:
	if (likely(!kmem_cache_debug(s)))
	if (!object || !alloc_debug_processing(s, page, object, addr))
		goto load_freelist;
		goto new_slab;

	/* Only entered in the debug case */
	if (!alloc_debug_processing(s, c->page, object, addr))
		goto new_slab;	/* Slab failed checks. Next slab needed */


	c->freelist = get_freepointer(s, object);
	c->freelist = get_freepointer(s, object);
	deactivate_slab(s, c);
	deactivate_slab(s, c);
	c->page = NULL;
	c->node = NUMA_NO_NODE;
	c->node = NUMA_NO_NODE;
	local_irq_restore(flags);
	local_irq_restore(flags);
	return object;
	return object;
@@ -2333,6 +2445,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		was_frozen = new.frozen;
		was_frozen = new.frozen;
		new.inuse--;
		new.inuse--;
		if ((!new.inuse || !prior) && !was_frozen && !n) {
		if ((!new.inuse || !prior) && !was_frozen && !n) {

			if (!kmem_cache_debug(s) && !prior)

				/*
				 * Slab was on no list before and will be partially empty
				 * We can defer the list move and instead freeze it.
				 */
				new.frozen = 1;

			else { /* Needs to be taken off a list */

	                        n = get_node(s, page_to_nid(page));
	                        n = get_node(s, page_to_nid(page));
				/*
				/*
				 * Speculatively acquire the list_lock.
				 * Speculatively acquire the list_lock.
@@ -2343,6 +2466,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
				 * other processors updating the list of slabs.
				 * other processors updating the list of slabs.
				 */
				 */
				spin_lock_irqsave(&n->list_lock, flags);
				spin_lock_irqsave(&n->list_lock, flags);

			}
		}
		}
		inuse = new.inuse;
		inuse = new.inuse;


@@ -2352,6 +2477,14 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		"__slab_free"));
		"__slab_free"));


	if (likely(!n)) {
	if (likely(!n)) {

		/*
		 * If we just froze the page then put it onto the
		 * per cpu partial list.
		 */
		if (new.frozen && !was_frozen)
			put_cpu_partial(s, page, 1);

		/*
		/*
		 * The list lock was not taken therefore no list
		 * The list lock was not taken therefore no list
		 * activity can be necessary.
		 * activity can be necessary.
@@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
		 */
		 */
		if (unlikely(!prior)) {
		if (unlikely(!prior)) {
			remove_full(s, page);
			remove_full(s, page);
			add_partial(n, page, 1);
			add_partial(n, page, DEACTIVATE_TO_TAIL);
			stat(s, FREE_ADD_PARTIAL);
			stat(s, FREE_ADD_PARTIAL);
		}
		}
	}
	}
@@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
	slab_free_hook(s, x);
	slab_free_hook(s, x);


redo:
redo:

	/*
	/*
	 * Determine the currently cpus per cpu slab.
	 * Determine the currently cpus per cpu slab.
	 * The cpu may change afterward. However that does not matter since
	 * The cpu may change afterward. However that does not matter since
@@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node)
	n = page->freelist;
	n = page->freelist;
	BUG_ON(!n);
	BUG_ON(!n);
	page->freelist = get_freepointer(kmem_cache_node, n);
	page->freelist = get_freepointer(kmem_cache_node, n);
	page->inuse++;
	page->inuse = 1;
	page->frozen = 0;
	page->frozen = 0;
	kmem_cache_node->node[node] = n;
	kmem_cache_node->node[node] = n;
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG
@@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node)
	init_kmem_cache_node(n, kmem_cache_node);
	init_kmem_cache_node(n, kmem_cache_node);
	inc_slabs_node(kmem_cache_node, node, page->objects);
	inc_slabs_node(kmem_cache_node, node, page->objects);


	add_partial(n, page, 0);
	add_partial(n, page, DEACTIVATE_TO_HEAD);
}
}


static void free_kmem_cache_nodes(struct kmem_cache *s)
static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s,
	 * The larger the object size is, the more pages we want on the partial
	 * The larger the object size is, the more pages we want on the partial
	 * list to avoid pounding the page allocator excessively.
	 * list to avoid pounding the page allocator excessively.
	 */
	 */
	set_min_partial(s, ilog2(s->size));
	set_min_partial(s, ilog2(s->size) / 2);

	/*
	 * cpu_partial determined the maximum number of objects kept in the
	 * per cpu partial lists of a processor.
	 *
	 * Per cpu partial lists mainly contain slabs that just have one
	 * object freed. If they are used for allocation then they can be
	 * filled up again with minimal effort. The slab will never hit the
	 * per node partial lists and therefore no locking will be required.
	 *
	 * This setting also determines
	 *
	 * A) The number of objects from per cpu partial slabs dumped to the
	 *    per node list when we reach the limit.
	 * B) The number of objects in cpu partial slabs to extract from the
	 *    per node list when we run out of per cpu objects. We only fetch 50%
	 *    to keep some capacity around for frees.
	 */
	if (s->size >= PAGE_SIZE)
		s->cpu_partial = 2;
	else if (s->size >= 1024)
		s->cpu_partial = 6;
	else if (s->size >= 256)
		s->cpu_partial = 13;
	else
		s->cpu_partial = 30;

	s->refcount = 1;
	s->refcount = 1;
#ifdef CONFIG_NUMA
#ifdef CONFIG_NUMA
	s->remote_node_defrag_ratio = 1000;
	s->remote_node_defrag_ratio = 1000;
@@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,


/*
/*
 * Attempt to free all partial slabs on a node.
 * Attempt to free all partial slabs on a node.
 * This is called from kmem_cache_close(). We must be the last thread
 * using the cache and therefore we do not need to lock anymore.
 */
 */
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
{
	unsigned long flags;
	struct page *page, *h;
	struct page *page, *h;


	spin_lock_irqsave(&n->list_lock, flags);
	list_for_each_entry_safe(page, h, &n->partial, lru) {
	list_for_each_entry_safe(page, h, &n->partial, lru) {
		if (!page->inuse) {
		if (!page->inuse) {
			remove_partial(n, page);
			remove_partial(n, page);
@@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
				"Objects remaining on kmem_cache_close()");
				"Objects remaining on kmem_cache_close()");
		}
		}
	}
	}
	spin_unlock_irqrestore(&n->list_lock, flags);
}
}


/*
/*
@@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
	s->refcount--;
	s->refcount--;
	if (!s->refcount) {
	if (!s->refcount) {
		list_del(&s->list);
		list_del(&s->list);
		up_write(&slub_lock);
		if (kmem_cache_close(s)) {
		if (kmem_cache_close(s)) {
			printk(KERN_ERR "SLUB %s: %s called for cache that "
			printk(KERN_ERR "SLUB %s: %s called for cache that "
				"still has objects.\n", s->name, __func__);
				"still has objects.\n", s->name, __func__);
@@ -3028,7 +3187,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
		if (s->flags & SLAB_DESTROY_BY_RCU)
		if (s->flags & SLAB_DESTROY_BY_RCU)
			rcu_barrier();
			rcu_barrier();
		sysfs_slab_remove(s);
		sysfs_slab_remove(s);
	}
	} else
		up_write(&slub_lock);
		up_write(&slub_lock);
}
}
EXPORT_SYMBOL(kmem_cache_destroy);
EXPORT_SYMBOL(kmem_cache_destroy);
@@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s)
		 * list_lock. page->inuse here is the upper limit.
		 * list_lock. page->inuse here is the upper limit.
		 */
		 */
		list_for_each_entry_safe(page, t, &n->partial, lru) {
		list_for_each_entry_safe(page, t, &n->partial, lru) {
			if (!page->inuse) {
			list_move(&page->lru, slabs_by_inuse + page->inuse);
				remove_partial(n, page);
			if (!page->inuse)
				discard_slab(s, page);
				n->nr_partial--;
			} else {
				list_move(&page->lru,
				slabs_by_inuse + page->inuse);
			}
		}
		}


		/*
		/*
		 * Rebuild the partial list with the slabs filled up most
		 * Rebuild the partial list with the slabs filled up most
		 * first and the least used slabs at the end.
		 * first and the least used slabs at the end.
		 */
		 */
		for (i = objects - 1; i >= 0; i--)
		for (i = objects - 1; i > 0; i--)
			list_splice(slabs_by_inuse + i, n->partial.prev);
			list_splice(slabs_by_inuse + i, n->partial.prev);


		spin_unlock_irqrestore(&n->list_lock, flags);
		spin_unlock_irqrestore(&n->list_lock, flags);

		/* Release empty slabs */
		list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
			discard_slab(s, page);
	}
	}


	kfree(slabs_by_inuse);
	kfree(slabs_by_inuse);
@@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,


		for_each_possible_cpu(cpu) {
		for_each_possible_cpu(cpu) {
			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
			struct page *page;


			if (!c || c->node < 0)
			if (!c || c->node < 0)
				continue;
				continue;
@@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
				total += x;
				total += x;
				nodes[c->node] += x;
				nodes[c->node] += x;
			}
			}
			page = c->partial;

			if (page) {
				x = page->pobjects;
                                total += x;
                                nodes[c->node] += x;
			}
			per_cpu[c->node]++;
			per_cpu[c->node]++;
		}
		}
	}
	}
@@ -4412,11 +4579,12 @@ struct slab_attribute {
};
};


#define SLAB_ATTR_RO(_name) \
#define SLAB_ATTR_RO(_name) \
	static struct slab_attribute _name##_attr = __ATTR_RO(_name)
	static struct slab_attribute _name##_attr = \
	__ATTR(_name, 0400, _name##_show, NULL)


#define SLAB_ATTR(_name) \
#define SLAB_ATTR(_name) \
	static struct slab_attribute _name##_attr =  \
	static struct slab_attribute _name##_attr =  \
	__ATTR(_name, 0644, _name##_show, _name##_store)
	__ATTR(_name, 0600, _name##_show, _name##_store)


static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
{
{
@@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
}
}
SLAB_ATTR(min_partial);
SLAB_ATTR(min_partial);


static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
	return sprintf(buf, "%u\n", s->cpu_partial);
}

static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
				 size_t length)
{
	unsigned long objects;
	int err;

	err = strict_strtoul(buf, 10, &objects);
	if (err)
		return err;

	s->cpu_partial = objects;
	flush_all(s);
	return length;
}
SLAB_ATTR(cpu_partial);

static ssize_t ctor_show(struct kmem_cache *s, char *buf)
static ssize_t ctor_show(struct kmem_cache *s, char *buf)
{
{
	if (!s->ctor)
	if (!s->ctor)
@@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
}
}
SLAB_ATTR_RO(objects_partial);
SLAB_ATTR_RO(objects_partial);


static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
	int objects = 0;
	int pages = 0;
	int cpu;
	int len;

	for_each_online_cpu(cpu) {
		struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;

		if (page) {
			pages += page->pages;
			objects += page->pobjects;
		}
	}

	len = sprintf(buf, "%d(%d)", objects, pages);

#ifdef CONFIG_SMP
	for_each_online_cpu(cpu) {
		struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;

		if (page && len < PAGE_SIZE - 20)
			len += sprintf(buf + len, " C%d=%d(%d)", cpu,
				page->pobjects, page->pages);
	}
#endif
	return len + sprintf(buf + len, "\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);

static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
{
{
	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
STAT_ATTR(ORDER_FALLBACK, order_fallback);
STAT_ATTR(ORDER_FALLBACK, order_fallback);
STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
#endif
#endif


static struct attribute *slab_attrs[] = {
static struct attribute *slab_attrs[] = {
@@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = {
	&objs_per_slab_attr.attr,
	&objs_per_slab_attr.attr,
	&order_attr.attr,
	&order_attr.attr,
	&min_partial_attr.attr,
	&min_partial_attr.attr,
	&cpu_partial_attr.attr,
	&objects_attr.attr,
	&objects_attr.attr,
	&objects_partial_attr.attr,
	&objects_partial_attr.attr,
	&partial_attr.attr,
	&partial_attr.attr,
@@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = {
	&destroy_by_rcu_attr.attr,
	&destroy_by_rcu_attr.attr,
	&shrink_attr.attr,
	&shrink_attr.attr,
	&reserved_attr.attr,
	&reserved_attr.attr,
	&slabs_cpu_partial_attr.attr,
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG
	&total_objects_attr.attr,
	&total_objects_attr.attr,
	&slabs_attr.attr,
	&slabs_attr.attr,
@@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = {
	&order_fallback_attr.attr,
	&order_fallback_attr.attr,
	&cmpxchg_double_fail_attr.attr,
	&cmpxchg_double_fail_attr.attr,
	&cmpxchg_double_cpu_fail_attr.attr,
	&cmpxchg_double_cpu_fail_attr.attr,
	&cpu_partial_alloc_attr.attr,
	&cpu_partial_free_attr.attr,
#endif
#endif
#ifdef CONFIG_FAILSLAB
#ifdef CONFIG_FAILSLAB
	&failslab_attr.attr,
	&failslab_attr.attr,
@@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = {


static int __init slab_proc_init(void)
static int __init slab_proc_init(void)
{
{
	proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
	return 0;
	return 0;
}
}
module_init(slab_proc_init);
module_init(slab_proc_init);
Loading