Commit df95968f authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-5.6-2020-01-17' of git://people.freedesktop.org/~agd5f/linux into drm-next



amd-drm-next-5.6-2020-01-17:

amdgpu:
- Fix 32 bit harder
- Powerplay cleanups
- VCN fixes for Arcturus
- RAS fixes
- eDP/DP fixes
- SR-IOV fixes
- Re-enable S/G display for PCO/RV2
- Free stolen memory after init on gmc10
- DF hashing optimizations for Arcturus
- Properly handle runtime pm in sysfs and debugfs
- Unify more GC programming between amdgpu and amdkfd
- Golden settings updates for gfx10
- GDDR6 training fixes
- Freesync fixes
- DSC fixes
- TMDS fixes
- Renoir USB-C fixes
- DC dml updates from hw team
- Pollock support
- Mutex init regresson fix

amdkfd:
- Unify more GC programming between amdgpu and amdkfd
- Use KIQ to setup HIQ rather than using MMIO

scheduler:
- Documentation fixes
- Improve job distribution with load sharing

drm:
- DP MST fix

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200117213625.4722-1-alexander.deucher@amd.com
parents 71e72740 7b199143
Loading
Loading
Loading
Loading
+4 −25
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@
#include "amdgpu_mes.h"
#include "amdgpu_umc.h"
#include "amdgpu_mmhub.h"
#include "amdgpu_df.h"

#define MAX_GPU_INSTANCE		16

@@ -664,29 +665,6 @@ struct amdgpu_mmio_remap {
	resource_size_t bus_addr;
};

struct amdgpu_df_funcs {
	void (*sw_init)(struct amdgpu_device *adev);
	void (*sw_fini)(struct amdgpu_device *adev);
	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
				      bool enable);
	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
						 bool enable);
	void (*get_clockgating_state)(struct amdgpu_device *adev,
				      u32 *flags);
	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
					    bool enable);
	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
					 int is_enable);
	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
					 int is_disable);
	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
					 uint64_t *count);
	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
			 uint32_t ficadl_val, uint32_t ficadh_val);
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
	GC_HWIP = 1,
@@ -930,6 +908,9 @@ struct amdgpu_device {
	bool                            enable_mes;
	struct amdgpu_mes               mes;

	/* df */
	struct amdgpu_df                df;

	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
	int				num_ip_blocks;
	struct mutex	mn_lock;
@@ -943,8 +924,6 @@ struct amdgpu_device {
	/* soc15 register offset based on ip, instance and  segment */
	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];

	const struct amdgpu_df_funcs	*df_funcs;

	/* delayed work_func for deferring clockgating during resume */
	struct delayed_work     delayed_init_work;

+35 −9
Original line number Diff line number Diff line
@@ -613,12 +613,6 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	if (is_support_sw_smu(adev))
		smu_switch_power_profile(&adev->smu,
					 PP_SMC_POWER_PROFILE_COMPUTE,
					 !idle);
	else if (adev->powerplay.pp_funcs &&
		 adev->powerplay.pp_funcs->switch_power_profile)
	amdgpu_dpm_switch_power_profile(adev,
					PP_SMC_POWER_PROFILE_COMPUTE,
					!idle);
@@ -634,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
	return false;
}

int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	if (adev->family == AMDGPU_FAMILY_AI) {
		int i;

		for (i = 0; i < adev->num_vmhubs; i++)
			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
	} else {
		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
	}

	return 0;
}

int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	uint32_t flush_type = 0;
	bool all_hub = false;

	if (adev->gmc.xgmi.num_physical_nodes &&
		adev->asic_type == CHIP_VEGA20)
		flush_type = 2;

	if (adev->family == AMDGPU_FAMILY_AI)
		all_hub = true;

	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
}

bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+2 −0
Original line number Diff line number Diff line
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
				uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

+47 −24
Original line number Diff line number Diff line
@@ -71,32 +71,56 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
				unsigned int engine_id,
				unsigned int queue_id)
{
	uint32_t sdma_engine_reg_base[8] = {
		SOC15_REG_OFFSET(SDMA0, 0,
				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA1, 0,
				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA2, 0,
				 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA3, 0,
				 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA4, 0,
				 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA5, 0,
				 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA6, 0,
				 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
		SOC15_REG_OFFSET(SDMA7, 0,
				 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
	};
	uint32_t sdma_engine_reg_base = 0;
	uint32_t sdma_rlc_reg_offset;

	switch (engine_id) {
	default:
		dev_warn(adev->dev,
			 "Invalid sdma engine id (%d), using engine id 0\n",
			 engine_id);
		/* fall through */
	case 0:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
		break;
	case 1:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
		break;
	case 2:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
		break;
	case 3:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
		break;
	case 4:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
		break;
	case 5:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
		break;
	case 6:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
		break;
	case 7:
		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
		break;
	}

	uint32_t retval = sdma_engine_reg_base[engine_id]
	sdma_rlc_reg_offset = sdma_engine_reg_base
		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);

	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
			queue_id, retval);
			queue_id, sdma_rlc_reg_offset);

	return retval;
	return sdma_rlc_reg_offset;
}

static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
@@ -281,6 +305,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
	.init_interrupts = kgd_gfx_v9_init_interrupts,
	.hqd_load = kgd_gfx_v9_hqd_load,
	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
	.hqd_sdma_load = kgd_hqd_sdma_load,
	.hqd_dump = kgd_gfx_v9_hqd_dump,
	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -296,7 +321,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
	.get_tile_config = kgd_gfx_v9_get_tile_config,
	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
	.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
+61 −88
Original line number Diff line number Diff line
@@ -107,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
	lock_srbm(kgd, mec, pipe, queue_id, 0);
}

static uint32_t get_queue_mask(struct amdgpu_device *adev,
static uint64_t get_queue_mask(struct amdgpu_device *adev,
			       uint32_t pipe_id, uint32_t queue_id)
{
	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
			    queue_id) & 31;
	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
			queue_id;

	return ((uint32_t)1) << bit;
	return 1ull << bit;
}

static void release_queue(struct kgd_dev *kgd)
@@ -268,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
	acquire_queue(kgd, pipe_id, queue_id);

	/* HIQ is set during driver init period with vmid set to 0*/
	if (m->cp_hqd_vmid == 0) {
		uint32_t value, mec, pipe;

		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
			mec, pipe, queue_id);
		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
			((mec << 5) | (pipe << 3) | queue_id | 0x80));
		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
	}

	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
	mqd_hqd = &m->cp_mqd_base_addr_lo;
	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -332,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
		       lower_32_bits((uint64_t)wptr));
		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
		       upper_32_bits((uint64_t)wptr));
		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
		       get_queue_mask(adev, pipe_id, queue_id));
		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
	}

	/* Start the EOP fetcher */
@@ -350,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
	return 0;
}

static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
			    uint32_t pipe_id, uint32_t queue_id,
			    uint32_t doorbell_off)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
	struct v10_compute_mqd *m;
	uint32_t mec, pipe;
	int r;

	m = get_mqd(mqd);

	acquire_queue(kgd, pipe_id, queue_id);

	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
		 mec, pipe, queue_id);

	spin_lock(&adev->gfx.kiq.ring_lock);
	r = amdgpu_ring_alloc(kiq_ring, 7);
	if (r) {
		pr_err("Failed to alloc KIQ (%d).\n", r);
		goto out_unlock;
	}

	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
	amdgpu_ring_write(kiq_ring,
			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
			  PACKET3_MAP_QUEUES_PIPE(pipe) |
			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
	amdgpu_ring_write(kiq_ring,
			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
	amdgpu_ring_commit(kiq_ring);

out_unlock:
	spin_unlock(&adev->gfx.kiq.ring_lock);
	release_queue(kgd);

	return r;
}

static int kgd_hqd_dump(struct kgd_dev *kgd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs)
@@ -686,71 +725,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}

static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
{
	signed long r;
	uint32_t seq;
	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;

	spin_lock(&adev->gfx.kiq.ring_lock);
	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
	amdgpu_ring_write(ring,
			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
			PACKET3_INVALIDATE_TLBS_PASID(pasid));
	amdgpu_fence_emit_polling(ring, &seq);
	amdgpu_ring_commit(ring);
	spin_unlock(&adev->gfx.kiq.ring_lock);

	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
	if (r < 1) {
		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
		return -ETIME;
	}

	return 0;
}

static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
	int vmid;
	uint16_t queried_pasid;
	bool ret;
	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;

	if (amdgpu_emu_mode == 0 && ring->sched.ready)
		return invalidate_tlbs_with_kiq(adev, pasid);

	for (vmid = 0; vmid < 16; vmid++) {
		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
			continue;

		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
				&queried_pasid);
		if (ret	&& queried_pasid == pasid) {
			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
					AMDGPU_GFXHUB_0, 0);
			break;
		}
	}

	return 0;
}

static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;

	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
		pr_err("non kfd vmid %d\n", vmid);
		return 0;
	}

	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
	return 0;
}

static int kgd_address_watch_disable(struct kgd_dev *kgd)
{
	return 0;
@@ -817,6 +791,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
	.init_interrupts = kgd_init_interrupts,
	.hqd_load = kgd_hqd_load,
	.hiq_mqd_load = kgd_hiq_mqd_load,
	.hqd_sdma_load = kgd_hqd_sdma_load,
	.hqd_dump = kgd_hqd_dump,
	.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -832,7 +807,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
			get_atc_vmid_pasid_mapping_info,
	.get_tile_config = amdgpu_amdkfd_get_tile_config,
	.set_vm_context_page_table_base = set_vm_context_page_table_base,
	.invalidate_tlbs = invalidate_tlbs,
	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
	.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
Loading