Commit a3b2d692 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds
Browse files

cgroups: use css id in swap cgroup for saving memory v5



Try to use CSS ID for records in swap_cgroup.  By this, on 64bit machine,
size of swap_cgroup goes down to 2 bytes from 8bytes.

This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)

	From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
	To   size of swap_cgroup = 2G/4k * 2 = 1Mbytes.

Reduction is large.  Of course, there are trade-offs.  This CSS ID will
add overhead to swap-in/swap-out/swap-free.

But in general,
  - swap is a resource which the user tend to avoid use.
  - If swap is never used, swap_cgroup area is not used.
  - Reading traditional manuals, size of swap should be proportional to
    size of memory. Memory size of machine is increasing now.

I think reducing size of swap_cgroup makes sense.

Note:
  - ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
  - memcg can be obsolete at rmdir() but not freed while refcnt from
    swap_cgroup is available.

Changelog v4->v5:
 - reworked on to memcg-charge-swapcache-to-proper-memcg.patch
Changlog ->v4:
 - fixed not configured case.
 - deleted unnecessary comments.
 - fixed NULL pointer bug.
 - fixed message in dmesg.

[nishimura@mxp.nes.nec.co.jp: css_tryget can be called twice in !PageCgroupUsed case]
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3c776e64
Loading
Loading
Loading
Loading
+6 −7
Original line number Diff line number Diff line
@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void)

#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
#include <linux/swap.h>
extern struct mem_cgroup *
swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type);
#else
#include <linux/swap.h>

static inline
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
	return NULL;
	return 0;
}

static inline
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
	return NULL;
	return 0;
}

static inline int
+62 −12
Original line number Diff line number Diff line
@@ -991,10 +991,31 @@ nomem:
	return -ENOMEM;
}


/*
 * A helper function to get mem_cgroup from ID. must be called under
 * rcu_read_lock(). The caller must check css_is_removed() or some if
 * it's concern. (dropping refcnt from swap can be called against removed
 * memcg.)
 */
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
	struct cgroup_subsys_state *css;

	/* ID 0 is unused ID */
	if (!id)
		return NULL;
	css = css_lookup(&mem_cgroup_subsys, id);
	if (!css)
		return NULL;
	return container_of(css, struct mem_cgroup, css);
}

static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
	struct mem_cgroup *mem;
	struct page_cgroup *pc;
	unsigned short id;
	swp_entry_t ent;

	VM_BUG_ON(!PageLocked(page));
@@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
	/*
	 * Used bit of swapcache is solid under page lock.
	 */
	if (PageCgroupUsed(pc))
	if (PageCgroupUsed(pc)) {
		mem = pc->mem_cgroup;
	else {
		if (mem && !css_tryget(&mem->css))
			mem = NULL;
	} else {
		ent.val = page_private(page);
		mem = lookup_swap_cgroup(ent);
		id = lookup_swap_cgroup(ent);
		rcu_read_lock();
		mem = mem_cgroup_lookup(id);
		if (mem && !css_tryget(&mem->css))
			mem = NULL;
		rcu_read_unlock();
	}
	if (!mem)
		return NULL;
	if (!css_tryget(&mem->css))
		return NULL;
	return mem;
}

@@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,

	if (do_swap_account && !ret && PageSwapCache(page)) {
		swp_entry_t ent = {.val = page_private(page)};
		unsigned short id;
		/* avoid double counting */
		mem = swap_cgroup_record(ent, NULL);
		id = swap_cgroup_record(ent, 0);
		rcu_read_lock();
		mem = mem_cgroup_lookup(id);
		if (mem) {
			/*
			 * We did swap-in. Then, this entry is doubly counted
			 * both in mem and memsw. We uncharge it, here.
			 * Recorded ID can be obsolete. We avoid calling
			 * css_tryget()
			 */
			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
			mem_cgroup_put(mem);
		}
		rcu_read_unlock();
	}
	return ret;
}
@@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
	 */
	if (do_swap_account && PageSwapCache(page)) {
		swp_entry_t ent = {.val = page_private(page)};
		unsigned short id;
		struct mem_cgroup *memcg;
		memcg = swap_cgroup_record(ent, NULL);

		id = swap_cgroup_record(ent, 0);
		rcu_read_lock();
		memcg = mem_cgroup_lookup(id);
		if (memcg) {
			/*
			 * This recorded memcg can be obsolete one. So, avoid
			 * calling css_tryget
			 */
			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
			mem_cgroup_put(memcg);
		}

		rcu_read_unlock();
	}
	/* add this page(page_cgroup) to the LRU we want. */

@@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
	/* record memcg information */
	if (do_swap_account && memcg) {
		swap_cgroup_record(ent, memcg);
		swap_cgroup_record(ent, css_id(&memcg->css));
		mem_cgroup_get(memcg);
	}
	if (memcg)
@@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
void mem_cgroup_uncharge_swap(swp_entry_t ent)
{
	struct mem_cgroup *memcg;
	unsigned short id;

	if (!do_swap_account)
		return;

	memcg = swap_cgroup_record(ent, NULL);
	id = swap_cgroup_record(ent, 0);
	rcu_read_lock();
	memcg = mem_cgroup_lookup(id);
	if (memcg) {
		/*
		 * We uncharge this because swap is freed.
		 * This memcg can be obsolete one. We avoid calling css_tryget
		 */
		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
		mem_cgroup_put(memcg);
	}
	rcu_read_unlock();
}
#endif

+14 −18
Original line number Diff line number Diff line
@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {

struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];

/*
 * This 8bytes seems big..maybe we can reduce this when we can use "id" for
 * cgroup rather than pointer.
 */
struct swap_cgroup {
	struct mem_cgroup	*val;
	unsigned short		id;
};
#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
#define SC_POS_MASK	(SC_PER_PAGE - 1)
@@ -342,10 +338,10 @@ not_enough_page:
 * @ent: swap entry to be recorded into
 * @mem: mem_cgroup to be recorded
 *
 * Returns old value at success, NULL at failure.
 * (Of course, old value can be NULL.)
 * Returns old value at success, 0 at failure.
 * (Of course, old value can be 0.)
 */
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{
	int type = swp_type(ent);
	unsigned long offset = swp_offset(ent);
@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
	struct swap_cgroup_ctrl *ctrl;
	struct page *mappage;
	struct swap_cgroup *sc;
	struct mem_cgroup *old;
	unsigned short old;

	if (!do_swap_account)
		return NULL;
		return 0;

	ctrl = &swap_cgroup_ctrl[type];

	mappage = ctrl->map[idx];
	sc = page_address(mappage);
	sc += pos;
	old = sc->val;
	sc->val = mem;
	old = sc->id;
	sc->id = id;

	return old;
}
@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
 * @ent: swap entry to be looked up.
 *
 * Returns pointer to mem_cgroup at success. NULL at failure.
 * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
 */
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
unsigned short lookup_swap_cgroup(swp_entry_t ent)
{
	int type = swp_type(ent);
	unsigned long offset = swp_offset(ent);
@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
	struct swap_cgroup_ctrl *ctrl;
	struct page *mappage;
	struct swap_cgroup *sc;
	struct mem_cgroup *ret;
	unsigned short ret;

	if (!do_swap_account)
		return NULL;
		return 0;

	ctrl = &swap_cgroup_ctrl[type];
	mappage = ctrl->map[idx];
	sc = page_address(mappage);
	sc += pos;
	ret = sc->val;
	ret = sc->id;
	return ret;
}

@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)

	printk(KERN_INFO
		"swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
		" and %ld bytes to hold mem_cgroup pointers on swap\n",
		" and %ld bytes to hold mem_cgroup information per swap ents\n",
		array_size, length * PAGE_SIZE);
	printk(KERN_INFO
	"swap_cgroup can be disabled by noswapaccount boot option.\n");