drm/amdgpu: Track pending retry faults in IH and VM (v2) (a2f14820) · Commits · 戴 / test

drivers/gpu/drm/Kconfig

+1 −0

Original line number	Diff line number	Diff line
		@@ -184,6 +184,7 @@ config DRM_AMDGPU
		select BACKLIGHT_CLASS_DEVICE
		select BACKLIGHT_LCD_SUPPORT
		select INTERVAL_TREE
		select CHASH
		help
		Choose this option if you have a recent AMD Radeon graphics card.

drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

+76 −0

Original line number	Diff line number	Diff line
		@@ -196,3 +196,79 @@ restart_ih:

		return IRQ_HANDLED;
		}

		/**
		* amdgpu_ih_add_fault - Add a page fault record
		*
		* @adev: amdgpu device pointer
		* @key: 64-bit encoding of PASID and address
		*
		* This should be called when a retry page fault interrupt is
		* received. If this is a new page fault, it will be added to a hash
		* table. The return value indicates whether this is a new fault, or
		* a fault that was already known and is already being handled.
		*
		* If there are too many pending page faults, this will fail. Retry
		* interrupts should be ignored in this case until there is enough
		* free space.
		*
		* Returns 0 if the fault was added, 1 if the fault was already known,
		* -ENOSPC if there are too many pending faults.
		*/
		int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
		{
		unsigned long flags;
		int r = -ENOSPC;

		if (WARN_ON_ONCE(!adev->irq.ih.faults))
		/* Should be allocated in <IP>_ih_sw_init on GPUs that
		* support retry faults and require retry filtering.
		*/
		return r;

		spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);

		/* Only let the hash table fill up to 50% for best performance */
		if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
		goto unlock_out;

		r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
		if (!r)
		adev->irq.ih.faults->count++;

		/* chash_table_copy_in should never fail unless we're losing count */
		WARN_ON_ONCE(r < 0);

		unlock_out:
		spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
		return r;
		}

		/**
		* amdgpu_ih_clear_fault - Remove a page fault record
		*
		* @adev: amdgpu device pointer
		* @key: 64-bit encoding of PASID and address
		*
		* This should be called when a page fault has been handled. Any
		* future interrupt with this key will be processed as a new
		* page fault.
		*/
		void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
		{
		unsigned long flags;
		int r;

		if (!adev->irq.ih.faults)
		return;

		spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);

		r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
		if (!WARN_ON_ONCE(r < 0)) {
		adev->irq.ih.faults->count--;
		WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
		}

		spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

+12 −0

Original line number	Diff line number	Diff line
		@@ -24,6 +24,8 @@
		#ifndef __AMDGPU_IH_H__
		#define __AMDGPU_IH_H__

		#include <linux/chash.h>

		struct amdgpu_device;
		/*
		* vega10+ IH clients
		@@ -69,6 +71,13 @@ enum amdgpu_ih_clientid

		#define AMDGPU_IH_CLIENTID_LEGACY 0

		#define AMDGPU_PAGEFAULT_HASH_BITS 8
		struct amdgpu_retryfault_hashtable {
		DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
		spinlock_t lock;
		int count;
		};

		/*
		* R6xx+ IH ring
		*/
		@@ -87,6 +96,7 @@ struct amdgpu_ih_ring {
		bool use_doorbell;
		bool use_bus_addr;
		dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
		struct amdgpu_retryfault_hashtable *faults;
		};

		#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
		@@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
		bool use_bus_addr);
		void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
		int amdgpu_ih_process(struct amdgpu_device *adev);
		int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
		void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);

		#endif

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

+7 −0

Original line number	Diff line number	Diff line
		@@ -2680,6 +2680,8 @@ int amdgpu_vm_init(struct amdgpu_device adev, struct amdgpu_vm vm,
		vm->pasid = pasid;
		}

		INIT_KFIFO(vm->faults);

		return 0;

		error_free_root:
		@@ -2731,8 +2733,13 @@ void amdgpu_vm_fini(struct amdgpu_device adev, struct amdgpu_vm vm)
		{
		struct amdgpu_bo_va_mapping mapping, tmp;
		bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
		u64 fault;
		int i;

		/* Clear pending page faults from IH when the VM is destroyed */
		while (kfifo_get(&vm->faults, &fault))
		amdgpu_ih_clear_fault(adev, fault);

		if (vm->pasid) {
		unsigned long flags;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -120,6 +120,10 @@ struct amdgpu_vm_pt {
		unsigned last_entry_used;
		};

		#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) \| (addr))
		#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
		#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)

		struct amdgpu_vm {
		/* tree of virtual addresses mapped */
		struct rb_root va;
		@@ -160,6 +164,9 @@ struct amdgpu_vm {

		/* Flag to indicate ATS support from PTE for GFX9 */
		bool pte_support_ats;

		/* Up to 128 pending page faults */
		DECLARE_KFIFO(faults, u64, 128);
		};

		struct amdgpu_vm_id {

Admin message