Commit 991e7673 authored by Shakeel Butt's avatar Shakeel Butt Committed by Linus Torvalds
Browse files

mm: memcontrol: account kernel stack per node



Currently the kernel stack is being accounted per-zone.  There is no need
to do that.  In addition due to being per-zone, memcg has to keep a
separate MEMCG_KERNEL_STACK_KB.  Make the stat per-node and deprecate
MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
node_stat_item.  In addition localize the kernel stack stats updates to
account_kernel_stack().

Signed-off-by: default avatarShakeel Butt <shakeelb@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarRoman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Link: http://lkml.kernel.org/r/20200630161539.1759185-1-shakeelb@google.com


Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent fbc1ac9d
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev,
		       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
		       nid, K(i.sharedram),
		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
		       nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
		       nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
		       nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
#endif
		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
		       nid, 0UL,
+2 −2
Original line number Diff line number Diff line
@@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
	show_val_kb(m, "SReclaimable:   ", sreclaimable);
	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
	seq_printf(m, "KernelStack:    %8lu kB\n",
		   global_zone_page_state(NR_KERNEL_STACK_KB));
		   global_node_page_state(NR_KERNEL_STACK_KB));
#ifdef CONFIG_SHADOW_CALL_STACK
	seq_printf(m, "ShadowCallStack:%8lu kB\n",
		   global_zone_page_state(NR_KERNEL_SCS_KB));
		   global_node_page_state(NR_KERNEL_SCS_KB));
#endif
	show_val_kb(m, "PageTables:     ",
		    global_zone_page_state(NR_PAGETABLE));
+19 −2
Original line number Diff line number Diff line
@@ -32,8 +32,6 @@ struct kmem_cache;
enum memcg_stat_item {
	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
	MEMCG_SOCK,
	/* XXX: why are these zone and not node counters? */
	MEMCG_KERNEL_STACK_KB,
	MEMCG_NR_STAT,
};

@@ -729,8 +727,19 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
			int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);

void mod_memcg_obj_state(void *p, int idx, int val);

static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
					 int val)
{
	unsigned long flags;

	local_irq_save(flags);
	__mod_lruvec_slab_state(p, idx, val);
	local_irq_restore(flags);
}

static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
					  enum node_stat_item idx, int val)
{
@@ -1151,6 +1160,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
	__mod_node_page_state(page_pgdat(page), idx, val);
}

static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
					 int val)
{
	struct page *page = virt_to_head_page(p);

	mod_node_page_state(page_pgdat(page), idx, val);
}

static inline void mod_memcg_obj_state(void *p, int idx, int val)
{
}
+4 −4
Original line number Diff line number Diff line
@@ -155,10 +155,6 @@ enum zone_stat_item {
	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
	NR_PAGETABLE,		/* used for pagetables */
	NR_KERNEL_STACK_KB,	/* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
	NR_KERNEL_SCS_KB,	/* measured in KiB */
#endif
	/* Second 128 byte cacheline */
	NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -203,6 +199,10 @@ enum node_stat_item {
	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
	NR_KERNEL_STACK_KB,	/* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
	NR_KERNEL_SCS_KB,	/* measured in KiB */
#endif
	NR_VM_NODE_STAT_ITEMS
};

+14 −37
Original line number Diff line number Diff line
@@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk)
	if (vm) {
		int i;

		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
			mod_memcg_page_state(vm->pages[i],
					     MEMCG_KERNEL_STACK_KB,
					     -(int)(PAGE_SIZE / 1024));

		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
			memcg_kmem_uncharge_page(vm->pages[i], 0);
		}

		for (i = 0; i < NR_CACHED_STACKS; i++) {
			if (this_cpu_cmpxchg(cached_stacks[i],
@@ -382,31 +377,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
	void *stack = task_stack_page(tsk);
	struct vm_struct *vm = task_stack_vm_area(tsk);

	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);

	if (vm) {
		int i;

		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);

		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
			mod_zone_page_state(page_zone(vm->pages[i]),
					    NR_KERNEL_STACK_KB,
					    PAGE_SIZE / 1024 * account);
		}
	} else {
		/*
		 * All stack pages are in the same zone and belong to the
		 * same memcg.
		 */
		struct page *first_page = virt_to_page(stack);

		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
				    THREAD_SIZE / 1024 * account);

		mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
	/* All stack pages are in the same node. */
	if (vm)
		mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
				      account * (THREAD_SIZE / 1024));
	else
		mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
				      account * (THREAD_SIZE / 1024));
	}
}

static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@ -415,24 +393,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
	struct vm_struct *vm = task_stack_vm_area(tsk);
	int ret;

	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);

	if (vm) {
		int i;

		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);

		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
			/*
			 * If memcg_kmem_charge_page() fails, page->mem_cgroup
			 * pointer is NULL, and both memcg_kmem_uncharge_page()
			 * and mod_memcg_page_state() in free_thread_stack()
			 * will ignore this page. So it's safe.
			 * pointer is NULL, and memcg_kmem_uncharge_page() in
			 * free_thread_stack() will ignore this page.
			 */
			ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
						     0);
			if (ret)
				return ret;

			mod_memcg_page_state(vm->pages[i],
					     MEMCG_KERNEL_STACK_KB,
					     PAGE_SIZE / 1024);
		}
	}
#endif
Loading