Commit 9d82c694 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

mm: memcontrol: convert anon and file-thp to new mem_cgroup_charge() API

With the page->mapping requirement gone from memcg, we can charge anon and
file-thp pages in one single step, right after they're allocated.

This removes two out of three API calls - especially the tricky commit
step that needed to happen at just the right time between when the page is
"set up" and when it's "published" - somewhat vague and fluid concepts
that varied by page type.  All we need is a freshly allocated page and a
memcg context to charge.

v2: prevent double charges on pre-allocated hugepages in khugepaged

[hannes@cmpxchg.org: Fix crash - *hpage could be ERR_PTR instead of NULL]
  Link: http://lkml.kernel.org/r/20200512215813.GA487759@cmpxchg.org


Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarJoonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Qian Cai <cai@lca.pw>
Link: http://lkml.kernel.org/r/20200508183105.225460-13-hannes@cmpxchg.org


Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 468c3982
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -501,7 +501,6 @@ struct vm_fault {
	pte_t orig_pte;			/* Value of PTE at the time of fault */

	struct page *cow_page;		/* Page handler may use for COW fault */
	struct mem_cgroup *memcg;	/* Cgroup cow_page belongs to */
	struct page *page;		/* ->fault handlers should return a
					 * page here, unless VM_FAULT_NOPAGE
					 * is set (which is also implied by
@@ -946,8 +945,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
	return pte;
}

vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
		struct page *page);
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
+3 −8
Original line number Diff line number Diff line
@@ -162,14 +162,13 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
	};
	int err;
	struct mmu_notifier_range range;
	struct mem_cgroup *memcg;

	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
				addr + PAGE_SIZE);

	if (new_page) {
		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
					    &memcg);
		err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL,
					false);
		if (err)
			return err;
	}
@@ -179,16 +178,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,

	mmu_notifier_invalidate_range_start(&range);
	err = -EAGAIN;
	if (!page_vma_mapped_walk(&pvmw)) {
		if (new_page)
			mem_cgroup_cancel_charge(new_page, memcg);
	if (!page_vma_mapped_walk(&pvmw))
		goto unlock;
	}
	VM_BUG_ON_PAGE(addr != pvmw.address, old_page);

	if (new_page) {
		get_page(new_page);
		mem_cgroup_commit_charge(new_page, memcg, false);
		page_add_new_anon_rmap(new_page, vma, addr, false);
		lru_cache_add_active_or_unevictable(new_page, vma);
	} else
+1 −1
Original line number Diff line number Diff line
@@ -2633,7 +2633,7 @@ void filemap_map_pages(struct vm_fault *vmf,
		if (vmf->pte)
			vmf->pte += xas.xa_index - last_pgoff;
		last_pgoff = xas.xa_index;
		if (alloc_set_pte(vmf, NULL, page))
		if (alloc_set_pte(vmf, page))
			goto unlock;
		unlock_page(page);
		goto next;
+3 −6
Original line number Diff line number Diff line
@@ -587,19 +587,19 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
			struct page *page, gfp_t gfp)
{
	struct vm_area_struct *vma = vmf->vma;
	struct mem_cgroup *memcg;
	pgtable_t pgtable;
	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
	vm_fault_t ret = 0;

	VM_BUG_ON_PAGE(!PageCompound(page), page);

	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg)) {
	if (mem_cgroup_charge(page, vma->vm_mm, gfp, false)) {
		put_page(page);
		count_vm_event(THP_FAULT_FALLBACK);
		count_vm_event(THP_FAULT_FALLBACK_CHARGE);
		return VM_FAULT_FALLBACK;
	}
	cgroup_throttle_swaprate(page, gfp);

	pgtable = pte_alloc_one(vma->vm_mm);
	if (unlikely(!pgtable)) {
@@ -630,7 +630,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
			vm_fault_t ret2;

			spin_unlock(vmf->ptl);
			mem_cgroup_cancel_charge(page, memcg);
			put_page(page);
			pte_free(vma->vm_mm, pgtable);
			ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
@@ -640,7 +639,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,

		entry = mk_huge_pmd(page, vma->vm_page_prot);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		mem_cgroup_commit_charge(page, memcg, false);
		page_add_new_anon_rmap(page, vma, haddr, true);
		lru_cache_add_active_or_unevictable(page, vma);
		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
@@ -649,7 +647,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
		mm_inc_nr_ptes(vma->vm_mm);
		spin_unlock(vmf->ptl);
		count_vm_event(THP_FAULT_ALLOC);
		count_memcg_events(memcg, THP_FAULT_ALLOC, 1);
		count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
	}

	return 0;
@@ -658,7 +656,6 @@ unlock_release:
release:
	if (pgtable)
		pte_free(vma->vm_mm, pgtable);
	mem_cgroup_cancel_charge(page, memcg);
	put_page(page);
	return ret;

+10 −25
Original line number Diff line number Diff line
@@ -1037,7 +1037,6 @@ static void collapse_huge_page(struct mm_struct *mm,
	struct page *new_page;
	spinlock_t *pmd_ptl, *pte_ptl;
	int isolated = 0, result = 0;
	struct mem_cgroup *memcg;
	struct vm_area_struct *vma;
	struct mmu_notifier_range range;
	gfp_t gfp;
@@ -1060,15 +1059,15 @@ static void collapse_huge_page(struct mm_struct *mm,
		goto out_nolock;
	}

	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) {
	if (unlikely(mem_cgroup_charge(new_page, mm, gfp, false))) {
		result = SCAN_CGROUP_CHARGE_FAIL;
		goto out_nolock;
	}
	count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);

	down_read(&mm->mmap_sem);
	result = hugepage_vma_revalidate(mm, address, &vma);
	if (result) {
		mem_cgroup_cancel_charge(new_page, memcg);
		up_read(&mm->mmap_sem);
		goto out_nolock;
	}
@@ -1076,7 +1075,6 @@ static void collapse_huge_page(struct mm_struct *mm,
	pmd = mm_find_pmd(mm, address);
	if (!pmd) {
		result = SCAN_PMD_NULL;
		mem_cgroup_cancel_charge(new_page, memcg);
		up_read(&mm->mmap_sem);
		goto out_nolock;
	}
@@ -1088,7 +1086,6 @@ static void collapse_huge_page(struct mm_struct *mm,
	 */
	if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
						     pmd, referenced)) {
		mem_cgroup_cancel_charge(new_page, memcg);
		up_read(&mm->mmap_sem);
		goto out_nolock;
	}
@@ -1175,9 +1172,7 @@ static void collapse_huge_page(struct mm_struct *mm,

	spin_lock(pmd_ptl);
	BUG_ON(!pmd_none(*pmd));
	mem_cgroup_commit_charge(new_page, memcg, false);
	page_add_new_anon_rmap(new_page, vma, address, true);
	count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
	lru_cache_add_active_or_unevictable(new_page, vma);
	pgtable_trans_huge_deposit(mm, pmd, pgtable);
	set_pmd_at(mm, address, pmd, _pmd);
@@ -1191,10 +1186,11 @@ static void collapse_huge_page(struct mm_struct *mm,
out_up_write:
	up_write(&mm->mmap_sem);
out_nolock:
	if (!IS_ERR_OR_NULL(*hpage))
		mem_cgroup_uncharge(*hpage);
	trace_mm_collapse_huge_page(mm, isolated, result);
	return;
out:
	mem_cgroup_cancel_charge(new_page, memcg);
	goto out_up_write;
}

@@ -1618,7 +1614,6 @@ static void collapse_file(struct mm_struct *mm,
	struct address_space *mapping = file->f_mapping;
	gfp_t gfp;
	struct page *new_page;
	struct mem_cgroup *memcg;
	pgoff_t index, end = start + HPAGE_PMD_NR;
	LIST_HEAD(pagelist);
	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
@@ -1637,10 +1632,11 @@ static void collapse_file(struct mm_struct *mm,
		goto out;
	}

	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) {
	if (unlikely(mem_cgroup_charge(new_page, mm, gfp, false))) {
		result = SCAN_CGROUP_CHARGE_FAIL;
		goto out;
	}
	count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);

	/* This will be less messy when we use multi-index entries */
	do {
@@ -1650,7 +1646,6 @@ static void collapse_file(struct mm_struct *mm,
			break;
		xas_unlock_irq(&xas);
		if (!xas_nomem(&xas, GFP_KERNEL)) {
			mem_cgroup_cancel_charge(new_page, memcg);
			result = SCAN_FAIL;
			goto out;
		}
@@ -1844,18 +1839,9 @@ out_unlock:
	}

	if (nr_none) {
		struct lruvec *lruvec;
		/*
		 * XXX: We have started try_charge and pinned the
		 * memcg, but the page isn't committed yet so we
		 * cannot use mod_lruvec_page_state(). This hackery
		 * will be cleaned up when remove the page->mapping
		 * dependency from memcg and fully charge above.
		 */
		lruvec = mem_cgroup_lruvec(memcg, page_pgdat(new_page));
		__mod_lruvec_state(lruvec, NR_FILE_PAGES, nr_none);
		__mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
		if (is_shmem)
			__mod_lruvec_state(lruvec, NR_SHMEM, nr_none);
			__mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
	}

xa_locked:
@@ -1893,7 +1879,6 @@ xa_unlocked:

		SetPageUptodate(new_page);
		page_ref_add(new_page, HPAGE_PMD_NR - 1);
		mem_cgroup_commit_charge(new_page, memcg, false);

		if (is_shmem) {
			set_page_dirty(new_page);
@@ -1901,7 +1886,6 @@ xa_unlocked:
		} else {
			lru_cache_add_file(new_page);
		}
		count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);

		/*
		 * Remove pte page tables, so we can re-fault the page as huge.
@@ -1948,13 +1932,14 @@ xa_unlocked:
		VM_BUG_ON(nr_none);
		xas_unlock_irq(&xas);

		mem_cgroup_cancel_charge(new_page, memcg);
		new_page->mapping = NULL;
	}

	unlock_page(new_page);
out:
	VM_BUG_ON(!list_empty(&pagelist));
	if (!IS_ERR_OR_NULL(*hpage))
		mem_cgroup_uncharge(*hpage);
	/* TODO: tracepoints */
}

Loading