mm: memcg/slab: use a single set of kmem_caches for all accounted allocations (9855609b) · Commits · 戴 / test

include/linux/memcontrol.h

+1 −4

Original line number	Diff line number	Diff line
		@@ -317,7 +317,6 @@ struct mem_cgroup {
		/* Index in the kmem_cache->memcg_params.memcg_caches array */
		int kmemcg_id;
		enum memcg_kmem_state kmem_state;
		struct list_head kmem_caches;
		struct obj_cgroup __rcu *objcg;
		struct list_head objcg_list; /* list of inherited objcgs */
		#endif
		@@ -1404,9 +1403,7 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
		}
		#endif

		struct kmem_cache memcg_kmem_get_cache(struct kmem_cache cachep,
		struct obj_cgroup **objcgp);
		void memcg_kmem_put_cache(struct kmem_cache *cachep);
		struct kmem_cache memcg_kmem_get_cache(struct kmem_cache cachep);

		#ifdef CONFIG_MEMCG_KMEM
		int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,

include/linux/slab.h

+1 −4

Original line number	Diff line number	Diff line
		@@ -155,8 +155,7 @@ struct kmem_cache kmem_cache_create_usercopy(const char name,
		void kmem_cache_destroy(struct kmem_cache *);
		int kmem_cache_shrink(struct kmem_cache *);

		void memcg_create_kmem_cache(struct mem_cgroup , struct kmem_cache );
		void memcg_deactivate_kmem_caches(struct mem_cgroup , struct mem_cgroup );
		void memcg_create_kmem_cache(struct kmem_cache *cachep);

		/*
		* Please use this macro to create slab caches. Simply specify the
		@@ -580,8 +579,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
		return __kmalloc_node(size, flags, node);
		}

		int memcg_update_all_caches(int num_memcgs);

		/**
		* kmalloc_array - allocate memory for an array.
		* @n: number of elements.

mm/memcontrol.c

+32 −131

Original line number	Diff line number	Diff line
		@@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
		}

		/*
		* This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
		* This will be used as a shrinker list's index.
		* The main reason for not using cgroup id for this:
		* this works better in sparse environments, where we have a lot of memcgs,
		* but only a few kmem-limited. Or also, if we have, for instance, 200
		@@ -569,9 +569,6 @@ ino_t page_cgroup_ino(struct page *page)
		unsigned long ino = 0;

		rcu_read_lock();
		if (PageSlab(page) && !PageTail(page)) {
		memcg = memcg_from_slab_page(page);
		} else {
		memcg = page->mem_cgroup;

		/*
		@@ -582,7 +579,6 @@ ino_t page_cgroup_ino(struct page *page)
		*/
		if ((unsigned long) memcg & 0x1UL)
		memcg = NULL;
		}

		while (memcg && !(memcg->css.flags & CSS_ONLINE))
		memcg = parent_mem_cgroup(memcg);
		@@ -2822,12 +2818,18 @@ struct mem_cgroup mem_cgroup_from_obj(void p)
		page = virt_to_head_page(p);

		/*
		* Slab pages don't have page->mem_cgroup set because corresponding
		* kmem caches can be reparented during the lifetime. That's why
		* memcg_from_slab_page() should be used instead.
		* Slab objects are accounted individually, not per-page.
		* Memcg membership data for each individual object is saved in
		* the page->obj_cgroups.
		*/
		if (PageSlab(page))
		return memcg_from_slab_page(page);
		if (page_has_obj_cgroups(page)) {
		struct obj_cgroup *objcg;
		unsigned int off;

		off = obj_to_index(page->slab_cache, page, p);
		objcg = page_obj_cgroups(page)[off];
		return obj_cgroup_memcg(objcg);
		}

		/* All other pages use page->mem_cgroup */
		return page->mem_cgroup;
		@@ -2882,8 +2884,6 @@ static int memcg_alloc_cache_id(void)
		else if (size > MEMCG_CACHES_MAX_SIZE)
		size = MEMCG_CACHES_MAX_SIZE;

		err = memcg_update_all_caches(size);
		if (!err)
		err = memcg_update_all_list_lrus(size);
		if (!err)
		memcg_nr_cache_ids = size;
		@@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id)
		}

		struct memcg_kmem_cache_create_work {
		struct mem_cgroup *memcg;
		struct kmem_cache *cachep;
		struct work_struct work;
		};
		@@ -2912,33 +2911,24 @@ static void memcg_kmem_cache_create_func(struct work_struct *w)
		{
		struct memcg_kmem_cache_create_work *cw =
		container_of(w, struct memcg_kmem_cache_create_work, work);
		struct mem_cgroup *memcg = cw->memcg;
		struct kmem_cache *cachep = cw->cachep;

		memcg_create_kmem_cache(memcg, cachep);
		memcg_create_kmem_cache(cachep);

		css_put(&memcg->css);
		kfree(cw);
		}

		/*
		* Enqueue the creation of a per-memcg kmem_cache.
		*/
		static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
		struct kmem_cache *cachep)
		static void memcg_schedule_kmem_cache_create(struct kmem_cache *cachep)
		{
		struct memcg_kmem_cache_create_work *cw;

		if (!css_tryget_online(&memcg->css))
		return;

		cw = kmalloc(sizeof(*cw), GFP_NOWAIT \| __GFP_NOWARN);
		if (!cw) {
		css_put(&memcg->css);
		if (!cw)
		return;
		}

		cw->memcg = memcg;
		cw->cachep = cachep;
		INIT_WORK(&cw->work, memcg_kmem_cache_create_func);

		@@ -2946,102 +2936,26 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
		}

		/**
		* memcg_kmem_get_cache: select the correct per-memcg cache for allocation
		* memcg_kmem_get_cache: select memcg or root cache for allocation
		* @cachep: the original global kmem cache
		*
		* Return the kmem_cache we're supposed to use for a slab allocation.
		* We try to use the current memcg's version of the cache.
		*
		* If the cache does not exist yet, if we are the first user of it, we
		* create it asynchronously in a workqueue and let the current allocation
		* go through with the original cache.
		*
		* This function takes a reference to the cache it returns to assure it
		* won't get destroyed while we are working with it. Once the caller is
		* done with it, memcg_kmem_put_cache() must be called to release the
		* reference.
		*/
		struct kmem_cache memcg_kmem_get_cache(struct kmem_cache cachep,
		struct obj_cgroup **objcgp)
		struct kmem_cache memcg_kmem_get_cache(struct kmem_cache cachep)
		{
		struct mem_cgroup *memcg;
		struct kmem_cache *memcg_cachep;
		struct memcg_cache_array *arr;
		int kmemcg_id;

		VM_BUG_ON(!is_root_cache(cachep));

		if (memcg_kmem_bypass())
		return cachep;

		rcu_read_lock();

		if (unlikely(current->active_memcg))
		memcg = current->active_memcg;
		else
		memcg = mem_cgroup_from_task(current);

		if (!memcg \|\| memcg == root_mem_cgroup)
		goto out_unlock;

		kmemcg_id = READ_ONCE(memcg->kmemcg_id);
		if (kmemcg_id < 0)
		goto out_unlock;

		arr = rcu_dereference(cachep->memcg_params.memcg_caches);

		/*
		* Make sure we will access the up-to-date value. The code updating
		* memcg_caches issues a write barrier to match the data dependency
		* barrier inside READ_ONCE() (see memcg_create_kmem_cache()).
		*/
		memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]);

		/*
		* If we are in a safe context (can wait, and not in interrupt
		* context), we could be be predictable and return right away.
		* This would guarantee that the allocation being performed
		* already belongs in the new cache.
		*
		* However, there are some clashes that can arrive from locking.
		* For instance, because we acquire the slab_mutex while doing
		* memcg_create_kmem_cache, this means no further allocation
		* could happen with the slab_mutex held. So it's better to
		* defer everything.
		*
		* If the memcg is dying or memcg_cache is about to be released,
		* don't bother creating new kmem_caches. Because memcg_cachep
		* is ZEROed as the fist step of kmem offlining, we don't need
		* percpu_ref_tryget_live() here. css_tryget_online() check in
		* memcg_schedule_kmem_cache_create() will prevent us from
		* creation of a new kmem_cache.
		*/
		if (unlikely(!memcg_cachep))
		memcg_schedule_kmem_cache_create(memcg, cachep);
		else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) {
		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);

		if (!objcg \|\| !obj_cgroup_tryget(objcg)) {
		percpu_ref_put(&memcg_cachep->memcg_params.refcnt);
		goto out_unlock;
		}

		*objcgp = objcg;
		cachep = memcg_cachep;
		}
		out_unlock:
		rcu_read_unlock();
		memcg_cachep = READ_ONCE(cachep->memcg_params.memcg_cache);
		if (unlikely(!memcg_cachep)) {
		memcg_schedule_kmem_cache_create(cachep);
		return cachep;
		}

		/**
		* memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
		* @cachep: the cache returned by memcg_kmem_get_cache
		*/
		void memcg_kmem_put_cache(struct kmem_cache *cachep)
		{
		if (!is_root_cache(cachep))
		percpu_ref_put(&cachep->memcg_params.refcnt);
		return memcg_cachep;
		}

		/**
		@@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
		*/
		memcg->kmemcg_id = memcg_id;
		memcg->kmem_state = KMEM_ONLINE;
		INIT_LIST_HEAD(&memcg->kmem_caches);

		return 0;
		}
		@@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)

		if (memcg->kmem_state != KMEM_ONLINE)
		return;
		/*
		* Clear the online state before clearing memcg_caches array
		* entries. The slab_mutex in memcg_deactivate_kmem_caches()
		* guarantees that no cache will be created for this cgroup
		* after we are done (see memcg_create_kmem_cache()).
		*/

		memcg->kmem_state = KMEM_ALLOCATED;

		parent = parent_mem_cgroup(memcg);
		if (!parent)
		parent = root_mem_cgroup;

		/*
		* Deactivate and reparent kmem_caches and objcgs.
		*/
		memcg_deactivate_kmem_caches(memcg, parent);
		memcg_reparent_objcgs(memcg, parent);

		kmemcg_id = memcg->kmemcg_id;
		@@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)

		/* The following stuff does not apply to the root */
		if (!parent) {
		#ifdef CONFIG_MEMCG_KMEM
		INIT_LIST_HEAD(&memcg->kmem_caches);
		#endif
		root_mem_cgroup = memcg;
		return &memcg->css;
		}

mm/slab.c

+3 −13

Original line number	Diff line number	Diff line
		@@ -1249,7 +1249,7 @@ void __init kmem_cache_init(void)
		nr_node_ids * sizeof(struct kmem_cache_node *),
		SLAB_HWCACHE_ALIGN, 0, 0);
		list_add(&kmem_cache->list, &slab_caches);
		memcg_link_cache(kmem_cache, NULL);
		memcg_link_cache(kmem_cache);
		slab_state = PARTIAL;

		/*
		@@ -2253,17 +2253,6 @@ int __kmem_cache_shrink(struct kmem_cache *cachep)
		return (ret ? 1 : 0);
		}

		#ifdef CONFIG_MEMCG
		void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
		{
		__kmem_cache_shrink(cachep);
		}

		void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
		{
		}
		#endif

		int __kmem_cache_shutdown(struct kmem_cache *cachep)
		{
		return __kmem_cache_shrink(cachep);
		@@ -3872,7 +3861,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
		return ret;

		lockdep_assert_held(&slab_mutex);
		for_each_memcg_cache(c, cachep) {
		c = memcg_cache(cachep);
		if (c) {
		/* return value determined by the root cache only */
		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
		}

mm/slab.h

+45 −101

Original line number	Diff line number	Diff line
		@@ -32,66 +32,25 @@ struct kmem_cache {

		#else /* !CONFIG_SLOB */

		struct memcg_cache_array {
		struct rcu_head rcu;
		struct kmem_cache *entries[0];
		};

		/*
		* This is the main placeholder for memcg-related information in kmem caches.
		* Both the root cache and the child caches will have it. For the root cache,
		* this will hold a dynamically allocated array large enough to hold
		* information about the currently limited memcgs in the system. To allow the
		* array to be accessed without taking any locks, on relocation we free the old
		* version only after a grace period.
		*
		* Root and child caches hold different metadata.
		* Both the root cache and the child cache will have it. Some fields are used
		* in both cases, other are specific to root caches.
		*
		* @root_cache: Common to root and child caches. NULL for root, pointer to
		* the root cache for children.
		*
		* The following fields are specific to root caches.
		*
		* @memcg_caches: kmemcg ID indexed table of child caches. This table is
		* used to index child cachces during allocation and cleared
		* early during shutdown.
		*
		* @root_caches_node: List node for slab_root_caches list.
		*
		* @children: List of all child caches. While the child caches are also
		* reachable through @memcg_caches, a child cache remains on
		* this list until it is actually destroyed.
		*
		* The following fields are specific to child caches.
		*
		* @memcg: Pointer to the memcg this cache belongs to.
		*
		* @children_node: List node for @root_cache->children list.
		*
		* @kmem_caches_node: List node for @memcg->kmem_caches list.
		* @memcg_cache: pointer to memcg kmem cache, used by all non-root memory
		* cgroups.
		* @root_caches_node: list node for slab_root_caches list.
		*/
		struct memcg_cache_params {
		struct kmem_cache *root_cache;
		union {
		struct {
		struct memcg_cache_array __rcu *memcg_caches;

		struct kmem_cache *memcg_cache;
		struct list_head __root_caches_node;
		struct list_head children;
		bool dying;
		};
		struct {
		struct mem_cgroup *memcg;
		struct list_head children_node;
		struct list_head kmem_caches_node;
		struct percpu_ref refcnt;

		void (work_fn)(struct kmem_cache );
		union {
		struct rcu_head rcu_head;
		struct work_struct work;
		};
		};
		};
		};
		#endif /* CONFIG_SLOB */

		@@ -236,8 +195,6 @@ bool __kmem_cache_empty(struct kmem_cache *);
		int __kmem_cache_shutdown(struct kmem_cache *);
		void __kmem_cache_release(struct kmem_cache *);
		int __kmem_cache_shrink(struct kmem_cache *);
		void __kmemcg_cache_deactivate(struct kmem_cache *s);
		void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
		void slab_kmem_cache_release(struct kmem_cache *);
		void kmem_cache_shrink_all(struct kmem_cache *s);

		@@ -311,14 +268,6 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
		extern struct list_head slab_root_caches;
		#define root_caches_node memcg_params.__root_caches_node

		/*
		* Iterate over all memcg caches of the given root cache. The caller must hold
		* slab_mutex.
		*/
		#define for_each_memcg_cache(iter, root) \
		list_for_each_entry(iter, &(root)->memcg_params.children, \
		memcg_params.children_node)

		static inline bool is_root_cache(struct kmem_cache *s)
		{
		return !s->memcg_params.root_cache;
		@@ -349,6 +298,13 @@ static inline struct kmem_cache memcg_root_cache(struct kmem_cache s)
		return s->memcg_params.root_cache;
		}

		static inline struct kmem_cache memcg_cache(struct kmem_cache s)
		{
		if (is_root_cache(s))
		return s->memcg_params.memcg_cache;
		return NULL;
		}

		static inline struct obj_cgroup *page_obj_cgroups(struct page page)
		{
		/*
		@@ -361,25 +317,9 @@ static inline struct obj_cgroup *page_obj_cgroups(struct page page)
		((unsigned long)page->obj_cgroups & ~0x1UL);
		}

		/*
		* Expects a pointer to a slab page. Please note, that PageSlab() check
		* isn't sufficient, as it returns true also for tail compound slab pages,
		* which do not have slab_cache pointer set.
		* So this function assumes that the page can pass PageSlab() && !PageTail()
		* check.
		*
		* The kmem_cache can be reparented asynchronously. The caller must ensure
		* the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex.
		*/
		static inline struct mem_cgroup memcg_from_slab_page(struct page page)
		static inline bool page_has_obj_cgroups(struct page *page)
		{
		struct kmem_cache *s;

		s = READ_ONCE(page->slab_cache);
		if (s && !is_root_cache(s))
		return READ_ONCE(s->memcg_params.memcg);

		return NULL;
		return ((unsigned long)page->obj_cgroups & 0x1UL);
		}

		static inline int memcg_alloc_page_obj_cgroups(struct page *page,
		@@ -418,17 +358,25 @@ static inline struct kmem_cache memcg_slab_pre_alloc_hook(struct kmem_cache s,
		size_t objects, gfp_t flags)
		{
		struct kmem_cache *cachep;
		struct obj_cgroup *objcg;

		cachep = memcg_kmem_get_cache(s, objcgp);
		if (memcg_kmem_bypass())
		return s;

		cachep = memcg_kmem_get_cache(s);
		if (is_root_cache(cachep))
		return s;

		if (obj_cgroup_charge(objcgp, flags, objects obj_full_size(s))) {
		obj_cgroup_put(*objcgp);
		memcg_kmem_put_cache(cachep);
		objcg = get_obj_cgroup_from_current();
		if (!objcg)
		return s;

		if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
		obj_cgroup_put(objcg);
		cachep = NULL;
		}

		*objcgp = objcg;
		return cachep;
		}

		@@ -467,7 +415,6 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
		}
		}
		obj_cgroup_put(objcg);
		memcg_kmem_put_cache(s);
		}

		static inline void memcg_slab_free_hook(struct kmem_cache s, struct page page,
		@@ -491,7 +438,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache s, struct page page,
		}

		extern void slab_init_memcg_params(struct kmem_cache *);
		extern void memcg_link_cache(struct kmem_cache s, struct mem_cgroup memcg);
		extern void memcg_link_cache(struct kmem_cache *s);

		#else /* CONFIG_MEMCG_KMEM */

		@@ -499,9 +446,6 @@ extern void memcg_link_cache(struct kmem_cache s, struct mem_cgroup memcg);
		#define slab_root_caches slab_caches
		#define root_caches_node list

		#define for_each_memcg_cache(iter, root) \
		for ((void)(iter), (void)(root); 0; )

		static inline bool is_root_cache(struct kmem_cache *s)
		{
		return true;
		@@ -523,7 +467,17 @@ static inline struct kmem_cache memcg_root_cache(struct kmem_cache s)
		return s;
		}

		static inline struct mem_cgroup memcg_from_slab_page(struct page page)
		static inline struct kmem_cache memcg_cache(struct kmem_cache s)
		{
		return NULL;
		}

		static inline bool page_has_obj_cgroups(struct page *page)
		{
		return false;
		}

		static inline struct mem_cgroup memcg_from_slab_obj(void ptr)
		{
		return NULL;
		}
		@@ -560,8 +514,7 @@ static inline void slab_init_memcg_params(struct kmem_cache *s)
		{
		}

		static inline void memcg_link_cache(struct kmem_cache *s,
		struct mem_cgroup *memcg)
		static inline void memcg_link_cache(struct kmem_cache *s)
		{
		}

		@@ -582,17 +535,14 @@ static __always_inline int charge_slab_page(struct page *page,
		gfp_t gfp, int order,
		struct kmem_cache *s)
		{
		#ifdef CONFIG_MEMCG_KMEM
		if (memcg_kmem_enabled() && !is_root_cache(s)) {
		int ret;

		ret = memcg_alloc_page_obj_cgroups(page, s, gfp);
		if (ret)
		return ret;

		percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
		}
		#endif

		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
		PAGE_SIZE << order);
		return 0;
		@@ -601,12 +551,9 @@ static __always_inline int charge_slab_page(struct page *page,
		static __always_inline void uncharge_slab_page(struct page *page, int order,
		struct kmem_cache *s)
		{
		#ifdef CONFIG_MEMCG_KMEM
		if (memcg_kmem_enabled() && !is_root_cache(s)) {
		if (memcg_kmem_enabled() && !is_root_cache(s))
		memcg_free_page_obj_cgroups(page);
		percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
		}
		#endif

		mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
		-(PAGE_SIZE << order));
		}
		@@ -749,9 +696,6 @@ static inline struct kmem_cache_node get_node(struct kmem_cache s, int node)
		void slab_start(struct seq_file m, loff_t *pos);
		void slab_next(struct seq_file m, void p, loff_t pos);
		void slab_stop(struct seq_file m, void p);
		void memcg_slab_start(struct seq_file m, loff_t *pos);
		void memcg_slab_next(struct seq_file m, void p, loff_t pos);
		void memcg_slab_stop(struct seq_file m, void p);
		int memcg_slab_show(struct seq_file m, void p);

		#if defined(CONFIG_SLAB) \|\| defined(CONFIG_SLUB_DEBUG)

Admin message