Commit 79eb597c authored by Daniel Jordan's avatar Daniel Jordan Committed by Linus Torvalds
Browse files

mm: add account_locked_vm utility function

locked_vm accounting is done roughly the same way in five places, so
unify them in a helper.

Include the helper's caller in the debug print to distinguish between
callsites.

Error codes stay the same, so user-visible behavior does too.  The one
exception is that the -EPERM case in tce_account_locked_vm is removed
because Alexey has never seen it triggered.

[daniel.m.jordan@oracle.com: v3]
  Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com
[sfr@canb.auug.org.au: fix mm/util.c]
Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com


Signed-off-by: default avatarDaniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
Tested-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Cc: Alan Tull <atull@kernel.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Moritz Fischer <mdf@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Steve Sistare <steven.sistare@oracle.com>
Cc: Wu Hao <hao.wu@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 73b20c84
Loading
Loading
Loading
Loading
+4 −40
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <linux/anon_inodes.h>
#include <linux/iommu.h>
#include <linux/file.h>
#include <linux/mm.h>

#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}

static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
	long ret = 0;

	if (!current || !current->mm)
		return ret; /* process exited */

	down_write(&current->mm->mmap_sem);

	if (inc) {
		unsigned long locked, lock_limit;

		locked = current->mm->locked_vm + stt_pages;
		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
			ret = -ENOMEM;
		else
			current->mm->locked_vm += stt_pages;
	} else {
		if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
			stt_pages = current->mm->locked_vm;

		current->mm->locked_vm -= stt_pages;
	}

	pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
			inc ? '+' : '-',
			stt_pages << PAGE_SHIFT,
			current->mm->locked_vm << PAGE_SHIFT,
			rlimit(RLIMIT_MEMLOCK),
			ret ? " - exceeded" : "");

	up_write(&current->mm->mmap_sem);

	return ret;
}

static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
{
	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
@@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)

	kvm_put_kvm(stt->kvm);

	kvmppc_account_memlimit(
	account_locked_vm(current->mm,
		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
	call_rcu(&stt->rcu, release_spapr_tce_table);

@@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
		return -EINVAL;

	npages = kvmppc_tce_pages(size);
	ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
	ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
	if (ret)
		return ret;

@@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,

	kfree(stt);
 fail_acct:
	kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
	account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
	return ret;
}

+4 −37
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/sizes.h>
#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
#include <linux/mm_inline.h>
@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t {
	u64 dev_hpa;		/* Device memory base address */
};

static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
		unsigned long npages, bool incr)
{
	long ret = 0, locked, lock_limit;

	if (!npages)
		return 0;

	down_write(&mm->mmap_sem);

	if (incr) {
		locked = mm->locked_vm + npages;
		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
			ret = -ENOMEM;
		else
			mm->locked_vm += npages;
	} else {
		if (WARN_ON_ONCE(npages > mm->locked_vm))
			npages = mm->locked_vm;
		mm->locked_vm -= npages;
	}

	pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
			current ? current->pid : 0,
			incr ? '+' : '-',
			npages << PAGE_SHIFT,
			mm->locked_vm << PAGE_SHIFT,
			rlimit(RLIMIT_MEMLOCK));
	up_write(&mm->mmap_sem);

	return ret;
}

bool mm_iommu_preregistered(struct mm_struct *mm)
{
	return !list_empty(&mm->context.iommu_group_mem_list);
@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
	unsigned long entry, chunk;

	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
		ret = account_locked_vm(mm, entries, true);
		if (ret)
			return ret;

@@ -211,7 +178,7 @@ free_exit:
	kfree(mem);

unlock_exit:
	mm_iommu_adjust_locked_vm(mm, locked_entries, false);
	account_locked_vm(mm, locked_entries, false);

	return ret;
}
@@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
unlock_exit:
	mutex_unlock(&mem_list_mutex);

	mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
	account_locked_vm(mm, unlock_entries, false);

	return ret;
}
+4 −49
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/dma-mapping.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/mm.h>

#include "dfl-afu.h"

@@ -31,52 +32,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata)
	afu->dma_regions = RB_ROOT;
}

/**
 * afu_dma_adjust_locked_vm - adjust locked memory
 * @dev: port device
 * @npages: number of pages
 * @incr: increase or decrease locked memory
 *
 * Increase or decrease the locked memory size with npages input.
 *
 * Return 0 on success.
 * Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK.
 */
static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
{
	unsigned long locked, lock_limit;
	int ret = 0;

	/* the task is exiting. */
	if (!current->mm)
		return 0;

	down_write(&current->mm->mmap_sem);

	if (incr) {
		locked = current->mm->locked_vm + npages;
		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
			ret = -ENOMEM;
		else
			current->mm->locked_vm += npages;
	} else {
		if (WARN_ON_ONCE(npages > current->mm->locked_vm))
			npages = current->mm->locked_vm;
		current->mm->locked_vm -= npages;
	}

	dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
		incr ? '+' : '-', npages << PAGE_SHIFT,
		current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK),
		ret ? "- exceeded" : "");

	up_write(&current->mm->mmap_sem);

	return ret;
}

/**
 * afu_dma_pin_pages - pin pages of given dma memory region
 * @pdata: feature device platform data
@@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
	struct device *dev = &pdata->dev->dev;
	int ret, pinned;

	ret = afu_dma_adjust_locked_vm(dev, npages, true);
	ret = account_locked_vm(current->mm, npages, true);
	if (ret)
		return ret;

@@ -121,7 +76,7 @@ put_pages:
free_pages:
	kfree(region->pages);
unlock_vm:
	afu_dma_adjust_locked_vm(dev, npages, false);
	account_locked_vm(current->mm, npages, false);
	return ret;
}

@@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata,

	put_all_pages(region->pages, npages);
	kfree(region->pages);
	afu_dma_adjust_locked_vm(dev, npages, false);
	account_locked_vm(current->mm, npages, false);

	dev_dbg(dev, "%ld pages unpinned\n", npages);
}
+5 −49
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/mm.h>

#include <asm/iommu.h>
#include <asm/tce.h>
@@ -31,51 +32,6 @@
static void tce_iommu_detach_group(void *iommu_data,
		struct iommu_group *iommu_group);

static long try_increment_locked_vm(struct mm_struct *mm, long npages)
{
	long ret = 0, locked, lock_limit;

	if (WARN_ON_ONCE(!mm))
		return -EPERM;

	if (!npages)
		return 0;

	down_write(&mm->mmap_sem);
	locked = mm->locked_vm + npages;
	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
	if (locked > lock_limit && !capable(CAP_IPC_LOCK))
		ret = -ENOMEM;
	else
		mm->locked_vm += npages;

	pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
			npages << PAGE_SHIFT,
			mm->locked_vm << PAGE_SHIFT,
			rlimit(RLIMIT_MEMLOCK),
			ret ? " - exceeded" : "");

	up_write(&mm->mmap_sem);

	return ret;
}

static void decrement_locked_vm(struct mm_struct *mm, long npages)
{
	if (!mm || !npages)
		return;

	down_write(&mm->mmap_sem);
	if (WARN_ON_ONCE(npages > mm->locked_vm))
		npages = mm->locked_vm;
	mm->locked_vm -= npages;
	pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
			npages << PAGE_SHIFT,
			mm->locked_vm << PAGE_SHIFT,
			rlimit(RLIMIT_MEMLOCK));
	up_write(&mm->mmap_sem);
}

/*
 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
 *
@@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container)
		return ret;

	locked = table_group->tce32_size >> PAGE_SHIFT;
	ret = try_increment_locked_vm(container->mm, locked);
	ret = account_locked_vm(container->mm, locked, true);
	if (ret)
		return ret;

@@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container)
	container->enabled = false;

	BUG_ON(!container->mm);
	decrement_locked_vm(container->mm, container->locked_pages);
	account_locked_vm(container->mm, container->locked_pages, false);
}

static void *tce_iommu_open(unsigned long arg)
@@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container,
	if (!table_size)
		return -EINVAL;

	ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
	ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
	if (ret)
		return ret;

@@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container,
	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;

	iommu_tce_table_put(tbl);
	decrement_locked_vm(container->mm, pages);
	account_locked_vm(container->mm, pages, false);
}

static long tce_iommu_create_window(struct tce_container *container,
+2 −15
Original line number Diff line number Diff line
@@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)

	ret = down_write_killable(&mm->mmap_sem);
	if (!ret) {
		if (npage > 0) {
			if (!dma->lock_cap) {
				unsigned long limit;

				limit = task_rlimit(dma->task,
						RLIMIT_MEMLOCK) >> PAGE_SHIFT;

				if (mm->locked_vm + npage > limit)
					ret = -ENOMEM;
			}
		}

		if (!ret)
			mm->locked_vm += npage;

		ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
					  dma->lock_cap);
		up_write(&mm->mmap_sem);
	}

Loading