Commit 56fc40ab authored by Yong Zhao's avatar Yong Zhao Committed by Alex Deucher
Browse files

drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid



get_atc_vmid_pasid_mapping_valid() is very similar to
get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new
function get_atc_vmid_pasid_mapping_info() to reduce register access
times. More importantly, getting the PASID and the valid bit atomically
with a single read fixes some potential race conditions where the
mapping changes between the two reads.

Signed-off-by: default avatarYong Zhao <Yong.Zhao@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3fe023d4
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -278,10 +278,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
	.get_atc_vmid_pasid_mapping_pasid =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
	.get_atc_vmid_pasid_mapping_valid =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
	.get_atc_vmid_pasid_mapping_info =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
	.get_tile_config = kgd_gfx_v9_get_tile_config,
	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
+19 −30
Original line number Diff line number Diff line
@@ -98,10 +98,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
					unsigned int watch_point_id,
					unsigned int reg_offset);

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
		uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
		uint8_t vmid);
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
		uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
@@ -155,10 +153,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.address_watch_execute = kgd_address_watch_execute,
	.wave_control_execute = kgd_wave_control_execute,
	.address_watch_get_offset = kgd_address_watch_get_offset,
	.get_atc_vmid_pasid_mapping_pasid =
			get_atc_vmid_pasid_mapping_pasid,
	.get_atc_vmid_pasid_mapping_valid =
			get_atc_vmid_pasid_mapping_valid,
	.get_atc_vmid_pasid_mapping_info =
			get_atc_vmid_pasid_mapping_info,
	.get_tile_config = amdgpu_amdkfd_get_tile_config,
	.set_vm_context_page_table_base = set_vm_context_page_table_base,
	.invalidate_tlbs = invalidate_tlbs,
@@ -775,26 +771,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
	return 0;
}

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
							uint8_t vmid)
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid)
{
	uint32_t reg;
	uint32_t value;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;

	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
		     + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}

static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
								uint8_t vmid)
{
	uint32_t reg;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;

	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
		     + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}

static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@@ -826,6 +813,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	int vmid;
	uint16_t queried_pasid;
	bool ret;
	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;

	if (amdgpu_emu_mode == 0 && ring->sched.ready)
@@ -834,15 +823,15 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
	for (vmid = 0; vmid < 16; vmid++) {
		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
			continue;
		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
				== pasid) {

		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
				&queried_pasid);
		if (ret	&& queried_pasid == pasid) {
			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
					AMDGPU_GFXHUB_0, 0);
			break;
		}
	}
	}

	return 0;
}
+9 −19
Original line number Diff line number Diff line
@@ -133,9 +133,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
					unsigned int watch_point_id,
					unsigned int reg_offset);

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
							uint8_t vmid);
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid);

static void set_scratch_backing_va(struct kgd_dev *kgd,
					uint64_t va, uint32_t vmid);
@@ -186,8 +185,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.address_watch_execute = kgd_address_watch_execute,
	.wave_control_execute = kgd_wave_control_execute,
	.address_watch_get_offset = kgd_address_watch_get_offset,
	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
	.set_scratch_backing_va = set_scratch_backing_va,
	.get_tile_config = get_tile_config,
	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -753,24 +751,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
}

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
							uint8_t vmid)
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid)
{
	uint32_t reg;
	uint32_t value;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;

	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}

static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
								uint8_t vmid)
{
	uint32_t reg;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;

	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}

static void set_scratch_backing_va(struct kgd_dev *kgd,
+10 −22
Original line number Diff line number Diff line
@@ -89,10 +89,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
					unsigned int watch_point_id,
					unsigned int reg_offset);

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
		uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
		uint8_t vmid);
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid);
static void set_scratch_backing_va(struct kgd_dev *kgd,
					uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
@@ -141,10 +139,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.address_watch_execute = kgd_address_watch_execute,
	.wave_control_execute = kgd_wave_control_execute,
	.address_watch_get_offset = kgd_address_watch_get_offset,
	.get_atc_vmid_pasid_mapping_pasid =
			get_atc_vmid_pasid_mapping_pasid,
	.get_atc_vmid_pasid_mapping_valid =
			get_atc_vmid_pasid_mapping_valid,
	.get_atc_vmid_pasid_mapping_info =
			get_atc_vmid_pasid_mapping_info,
	.set_scratch_backing_va = set_scratch_backing_va,
	.get_tile_config = get_tile_config,
	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -667,24 +663,16 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
	return 0;
}

static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
							uint8_t vmid)
static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid)
{
	uint32_t reg;
	uint32_t value;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;

	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}

static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
								uint8_t vmid)
{
	uint32_t reg;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;

	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}

static int kgd_address_watch_disable(struct kgd_dev *kgd)
+18 −27
Original line number Diff line number Diff line
@@ -612,26 +612,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
	return 0;
}

bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
							uint8_t vmid)
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
					uint8_t vmid, uint16_t *p_pasid)
{
	uint32_t reg;
	uint32_t value;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;

	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
		     + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}

uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
								uint8_t vmid)
{
	uint32_t reg;
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;

	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
		     + vmid);
	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}

static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
@@ -666,6 +657,8 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	int vmid, i;
	uint16_t queried_pasid;
	bool ret;
	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
	uint32_t flush_type = 0;

@@ -681,16 +674,16 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
	for (vmid = 0; vmid < 16; vmid++) {
		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
			continue;
		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
				== pasid) {

		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
				&queried_pasid);
		if (ret && queried_pasid == pasid) {
			for (i = 0; i < adev->num_vmhubs; i++)
				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
							i, flush_type);
			break;
		}
	}
	}

	return 0;
}
@@ -813,10 +806,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
	.get_atc_vmid_pasid_mapping_pasid =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
	.get_atc_vmid_pasid_mapping_valid =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
	.get_atc_vmid_pasid_mapping_info =
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
	.get_tile_config = kgd_gfx_v9_get_tile_config,
	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
Loading