Commit fa739f4b authored by James Zhu's avatar James Zhu Committed by Alex Deucher
Browse files

drm/amdgpu: add multiple instances support for Arcturus



Arcturus has dual-VCN. Need add multiple instances support for Arcturus.

Signed-off-by: default avatarJames Zhu <James.Zhu@amd.com>
Reviewed-by: default avatarLeo Liu <leo.liu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c01b6a1d
Loading
Loading
Loading
Loading
+13 −7
Original line number Diff line number Diff line
@@ -408,23 +408,29 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
		break;
	case AMDGPU_HW_IP_VCN_DEC:
		type = AMD_IP_BLOCK_TYPE_VCN;
		if (adev->vcn.inst[0].ring_dec.sched.ready)
		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
			if (adev->vcn.inst[i].ring_dec.sched.ready)
				++num_rings;
		}
		ib_start_alignment = 16;
		ib_size_alignment = 16;
		break;
	case AMDGPU_HW_IP_VCN_ENC:
		type = AMD_IP_BLOCK_TYPE_VCN;
		for (i = 0; i < adev->vcn.num_enc_rings; i++)
			if (adev->vcn.inst[0].ring_enc[i].sched.ready)
		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
			for (j = 0; j < adev->vcn.num_enc_rings; j++)
				if (adev->vcn.inst[i].ring_enc[j].sched.ready)
					++num_rings;
		}
		ib_start_alignment = 64;
		ib_size_alignment = 1;
		break;
	case AMDGPU_HW_IP_VCN_JPEG:
		type = AMD_IP_BLOCK_TYPE_VCN;
		if (adev->vcn.inst[0].ring_jpeg.sched.ready)
		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
			if (adev->vcn.inst[i].ring_jpeg.sched.ready)
				++num_rings;
		}
		ib_start_alignment = 16;
		ib_size_alignment = 16;
		break;
+89 −77
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
	const char *fw_name;
	const struct common_firmware_header *hdr;
	unsigned char fw_check;
	int r;
	int i, r;

	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);

@@ -146,13 +146,16 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[0].vcpu_bo,
				    &adev->vcn.inst[0].gpu_addr, &adev->vcn.inst[0].cpu_addr);
						AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
						&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
		if (r) {
			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
			return r;
		}
	}

	if (adev->vcn.indirect_sram) {
		r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
@@ -169,9 +172,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)

int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
	int i;

	kvfree(adev->vcn.inst[0].saved_bo);
	int i, j;

	if (adev->vcn.indirect_sram) {
		amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
@@ -179,16 +180,20 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
				      (void **)&adev->vcn.dpg_sram_cpu_addr);
	}

	amdgpu_bo_free_kernel(&adev->vcn.inst[0].vcpu_bo,
			      &adev->vcn.inst[0].gpu_addr,
			      (void **)&adev->vcn.inst[0].cpu_addr);
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
		kvfree(adev->vcn.inst[j].saved_bo);

		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
					  &adev->vcn.inst[j].gpu_addr,
					  (void **)&adev->vcn.inst[j].cpu_addr);

	amdgpu_ring_fini(&adev->vcn.inst[0].ring_dec);
		amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);

		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
		amdgpu_ring_fini(&adev->vcn.inst[0].ring_enc[i]);
			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);

	amdgpu_ring_fini(&adev->vcn.inst[0].ring_jpeg);
		amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg);
	}

	release_firmware(adev->vcn.fw);

@@ -199,21 +204,23 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
	unsigned size;
	void *ptr;
	int i;

	cancel_delayed_work_sync(&adev->vcn.idle_work);

	if (adev->vcn.inst[0].vcpu_bo == NULL)
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		if (adev->vcn.inst[i].vcpu_bo == NULL)
			return 0;

	size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo);
	ptr = adev->vcn.inst[0].cpu_addr;
		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
		ptr = adev->vcn.inst[i].cpu_addr;

	adev->vcn.inst[0].saved_bo = kvmalloc(size, GFP_KERNEL);
	if (!adev->vcn.inst[0].saved_bo)
		adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
		if (!adev->vcn.inst[i].saved_bo)
			return -ENOMEM;

	memcpy_fromio(adev->vcn.inst[0].saved_bo, ptr, size);

		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
	}
	return 0;
}

@@ -221,17 +228,19 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
{
	unsigned size;
	void *ptr;
	int i;

	if (adev->vcn.inst[0].vcpu_bo == NULL)
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		if (adev->vcn.inst[i].vcpu_bo == NULL)
			return -EINVAL;

	size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo);
	ptr = adev->vcn.inst[0].cpu_addr;
		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
		ptr = adev->vcn.inst[i].cpu_addr;

	if (adev->vcn.inst[0].saved_bo != NULL) {
		memcpy_toio(ptr, adev->vcn.inst[0].saved_bo, size);
		kvfree(adev->vcn.inst[0].saved_bo);
		adev->vcn.inst[0].saved_bo = NULL;
		if (adev->vcn.inst[i].saved_bo != NULL) {
			memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
			kvfree(adev->vcn.inst[i].saved_bo);
			adev->vcn.inst[i].saved_bo = NULL;
		} else {
			const struct common_firmware_header *hdr;
			unsigned offset;
@@ -239,14 +248,14 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
			hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
			memcpy_toio(adev->vcn.inst[0].cpu_addr, adev->vcn.fw->data + offset,
				memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
					    le32_to_cpu(hdr->ucode_size_bytes));
				size -= le32_to_cpu(hdr->ucode_size_bytes);
				ptr += le32_to_cpu(hdr->ucode_size_bytes);
			}
			memset_io(ptr, 0, size);
		}

	}
	return 0;
}

@@ -254,22 +263,23 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
	struct amdgpu_device *adev =
		container_of(work, struct amdgpu_device, vcn.idle_work.work);
	unsigned int fences = 0;
	unsigned int i;
	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
	unsigned int i, j;

	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
		fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]);
			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
		}

		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
			struct dpg_pause_state new_state;

		if (fences)
			if (fence[j])
				new_state.fw_based = VCN_DPG_STATE__PAUSE;
			else
				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;

		if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg))
			if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg))
				new_state.jpeg = VCN_DPG_STATE__PAUSE;
			else
				new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
@@ -277,8 +287,10 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
			adev->vcn.pause_dpg_mode(adev, &new_state);
		}

	fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg);
	fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_dec);
		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg);
		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
		fences += fence[j];
	}

	if (fences == 0) {
		amdgpu_gfx_off_ctrl(adev, true);
@@ -312,14 +324,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
		unsigned int i;

		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]);
			fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
		}
		if (fences)
			new_state.fw_based = VCN_DPG_STATE__PAUSE;
		else
			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;

		if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg))
		if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))
			new_state.jpeg = VCN_DPG_STATE__PAUSE;
		else
			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
@@ -345,7 +357,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
	unsigned i;
	int r;

	WREG32(adev->vcn.inst[0].external.scratch9, 0xCAFEDEAD);
	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
	r = amdgpu_ring_alloc(ring, 3);
	if (r)
		return r;
@@ -353,7 +365,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
	amdgpu_ring_write(ring, 0xDEADBEEF);
	amdgpu_ring_commit(ring);
	for (i = 0; i < adev->usec_timeout; i++) {
		tmp = RREG32(adev->vcn.inst[0].external.scratch9);
		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
		if (tmp == 0xDEADBEEF)
			break;
		udelay(1);
@@ -664,7 +676,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
	unsigned i;
	int r;

	WREG32(adev->vcn.inst[0].external.jpeg_pitch, 0xCAFEDEAD);
	WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
	r = amdgpu_ring_alloc(ring, 3);
	if (r)
		return r;
@@ -674,7 +686,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
	amdgpu_ring_commit(ring);

	for (i = 0; i < adev->usec_timeout; i++) {
		tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch);
		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
		if (tmp == 0xDEADBEEF)
			break;
		udelay(1);
@@ -748,7 +760,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
	}

	for (i = 0; i < adev->usec_timeout; i++) {
		tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch);
		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
		if (tmp == 0xDEADBEEF)
			break;
		udelay(1);
+635 −543

File changed.

Preview size limit exceeded, changes collapsed.