Commit 5d240da9 authored by Eric Huang's avatar Eric Huang Committed by Alex Deucher
Browse files

drm/amdkfd: change system memory overcommit limit



It is to improve system limit by:
1. replacing userptrlimit with a total memory limit that
conunts TTM memory usage and userptr usage.
2. counting acc size for all BOs.

Signed-off-by: default avatarEric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0f04e538
Loading
Loading
Loading
Loading
+58 −41
Original line number Diff line number Diff line
@@ -46,9 +46,9 @@
/* Impose limit on how much memory KFD can use */
static struct {
	uint64_t max_system_mem_limit;
	uint64_t max_userptr_mem_limit;
	uint64_t max_ttm_mem_limit;
	int64_t system_mem_used;
	int64_t userptr_mem_used;
	int64_t ttm_mem_used;
	spinlock_t mem_limit_lock;
} kfd_mem_limit;

@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}

/* Set memory usage limits. Current, limits are
 *  System (kernel) memory - 3/8th System RAM
 *  Userptr memory - 3/4th System RAM
 *  System (TTM + userptr) memory - 3/4th System RAM
 *  TTM memory - 3/8th System RAM
 */
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
	mem *= si.mem_unit;

	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
	kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
	kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
		(kfd_mem_limit.max_system_mem_limit >> 20),
		(kfd_mem_limit.max_userptr_mem_limit >> 20));
		(kfd_mem_limit.max_ttm_mem_limit >> 20));
}

static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
					      uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
{
	size_t acc_size;
	size_t acc_size, system_mem_needed, ttm_mem_needed;
	int ret = 0;

	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
				       sizeof(struct amdgpu_bo));

	spin_lock(&kfd_mem_limit.mem_limit_lock);

	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
			kfd_mem_limit.max_system_mem_limit) {
			ret = -ENOMEM;
			goto err_no_mem;
		/* TTM GTT memory */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size + size;
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		/* Userptr */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size;
	} else {
		/* VRAM and SG */
		system_mem_needed = acc_size;
		ttm_mem_needed = acc_size;
	}
		kfd_mem_limit.system_mem_used += (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		if ((kfd_mem_limit.system_mem_used + acc_size >

	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
		kfd_mem_limit.max_system_mem_limit) ||
			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
			kfd_mem_limit.max_userptr_mem_limit)) {
		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
		kfd_mem_limit.max_ttm_mem_limit))
		ret = -ENOMEM;
			goto err_no_mem;
		}
		kfd_mem_limit.system_mem_used += acc_size;
		kfd_mem_limit.userptr_mem_used += size;
	else {
		kfd_mem_limit.system_mem_used += system_mem_needed;
		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
	}
err_no_mem:

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
	return ret;
}

static void unreserve_system_mem_limit(struct amdgpu_device *adev,
				       uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
{
	size_t acc_size;

@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
	spin_lock(&kfd_mem_limit.mem_limit_lock);
	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
		kfd_mem_limit.ttm_mem_used -= acc_size;
	} else {
		kfd_mem_limit.system_mem_used -= acc_size;
		kfd_mem_limit.userptr_mem_used -= size;
		kfd_mem_limit.ttm_mem_used -= acc_size;
	}
	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		  "kfd system memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		  "kfd userptr memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		  "kfd TTM memory accounting unbalanced");

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
	spin_lock(&kfd_mem_limit.mem_limit_lock);

	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
		kfd_mem_limit.system_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
	} else {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
	}
	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		  "kfd system memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		  "kfd userptr memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		  "kfd TTM memory accounting unbalanced");

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -1219,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

	amdgpu_sync_create(&(*mem)->sync);

	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
						     alloc_domain, false);
	if (ret) {
		pr_debug("Insufficient system memory\n");
		goto err_reserve_system_mem;
		goto err_reserve_limit;
	}

	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1270,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
	amdgpu_bo_unref(&bo);
	/* Don't unreserve system mem limit twice */
	goto err_reserve_system_mem;
	goto err_reserve_limit;
err_bo_create:
	unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
	unreserve_system_mem_limit(adev, size, alloc_domain, false);
err_reserve_limit:
	mutex_destroy(&(*mem)->lock);
	kfree(*mem);
	return ret;