Commit a269e449 authored by Alex Sierra's avatar Alex Sierra Committed by Alex Deucher
Browse files

drm/amdgpu: Avoid reclaim fs while eviction lock



[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a9ffe2a9
Loading
Loading
Loading
Loading
+33 −7
Original line number Diff line number Diff line
@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
	struct dma_fence_cb cb;
};

/**
 * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
 * happens while holding this lock anywhere to prevent deadlocks when
 * an MMU notifier runs in reclaim-FS context.
 */
static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
{
	mutex_lock(&vm->eviction_lock);
	vm->saved_flags = memalloc_nofs_save();
}

static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
{
	if (mutex_trylock(&vm->eviction_lock)) {
		vm->saved_flags = memalloc_nofs_save();
		return 1;
	}
	return 0;
}

static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
{
	memalloc_nofs_restore(vm->saved_flags);
	mutex_unlock(&vm->eviction_lock);
}

/**
 * amdgpu_vm_level_shift - return the addr shift for each level
 *
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
		}
	}

	mutex_lock(&vm->eviction_lock);
	amdgpu_vm_eviction_lock(vm);
	vm->evicting = false;
	mutex_unlock(&vm->eviction_lock);
	amdgpu_vm_eviction_unlock(vm);

	return 0;
}
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
	if (!(flags & AMDGPU_PTE_VALID))
		owner = AMDGPU_FENCE_OWNER_KFD;

	mutex_lock(&vm->eviction_lock);
	amdgpu_vm_eviction_lock(vm);
	if (vm->evicting) {
		r = -EBUSY;
		goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
	r = vm->update_funcs->commit(&params, fence);

error_unlock:
	mutex_unlock(&vm->eviction_lock);
	amdgpu_vm_eviction_unlock(vm);
	return r;
}

@@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
		return false;

	/* Try to block ongoing updates */
	if (!mutex_trylock(&bo_base->vm->eviction_lock))
	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
		return false;

	/* Don't evict VM page tables while they are updated */
	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
		mutex_unlock(&bo_base->vm->eviction_lock);
		amdgpu_vm_eviction_unlock(bo_base->vm);
		return false;
	}

	bo_base->vm->evicting = true;
	mutex_unlock(&bo_base->vm->eviction_lock);
	amdgpu_vm_eviction_unlock(bo_base->vm);
	return true;
}

+5 −1
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <linux/sched/mm.h>

#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -239,9 +240,12 @@ struct amdgpu_vm {
	/* tree of virtual addresses mapped */
	struct rb_root_cached	va;

	/* Lock to prevent eviction while we are updating page tables */
	/* Lock to prevent eviction while we are updating page tables
	 * use vm_eviction_lock/unlock(vm)
	 */
	struct mutex		eviction_lock;
	bool			evicting;
	unsigned int		saved_flags;

	/* BOs who needs a validation */
	struct list_head	evicted;