Commit 898c2cb5 authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: use scheduler fault instead of reset work



Signal a fault to the scheduler on an illegal instruction or register
access violation instead of kicking of the reset handler directly.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2c498d1d
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -830,7 +830,6 @@ struct amdgpu_device {
	bool				need_dma32;
	bool				need_swiotlb;
	bool				accel_working;
	struct work_struct		reset_work;
	struct notifier_block		acpi_nb;
	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
	struct amdgpu_debugfs		debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
+0 −21
Original line number Diff line number Diff line
@@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
	drm_helper_hpd_irq_event(dev);
}

/**
 * amdgpu_irq_reset_work_func - execute GPU reset
 *
 * @work: work struct pointer
 *
 * Execute scheduled GPU reset (Cayman+).
 * This function is called when the IRQ handler thinks we need a GPU reset.
 */
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
						  reset_work);

	if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
		amdgpu_device_gpu_recover(adev, NULL);
}

/**
 * amdgpu_irq_disable_all - disable *all* interrupts
 *
@@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
				amdgpu_hotplug_work_func);
	}

	INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);

	adev->irq.installed = true;
	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
	if (r) {
		adev->irq.installed = false;
		if (!amdgpu_device_has_dc_support(adev))
			flush_work(&adev->hotplug_work);
		cancel_work_sync(&adev->reset_work);
		return r;
	}
	adev->ddev->max_vblank_count = 0x00ffffff;
@@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
			pci_disable_msi(adev->pdev);
		if (!amdgpu_device_has_dc_support(adev))
			flush_work(&adev->hotplug_work);
		cancel_work_sync(&adev->reset_work);
	}

	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+4 −1
Original line number Diff line number Diff line
@@ -1214,8 +1214,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
					     struct amdgpu_irq_src *source,
					     struct amdgpu_iv_entry *entry)
{
	u8 instance_id;

	DRM_ERROR("Illegal instruction in SDMA command stream\n");
	schedule_work(&adev->reset_work);
	instance_id = (entry->ring_id & 0x3) >> 0;
	drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
	return 0;
}

+21 −2
Original line number Diff line number Diff line
@@ -3393,12 +3393,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
	return 0;
}

static void gfx_v6_0_fault(struct amdgpu_device *adev,
			   struct amdgpu_iv_entry *entry)
{
	struct amdgpu_ring *ring;

	switch (entry->ring_id) {
	case 0:
		ring = &adev->gfx.gfx_ring[0];
		break;
	case 1:
	case 2:
		ring = &adev->gfx.compute_ring[entry->ring_id - 1];
		break;
	default:
		return;
	}
	drm_sched_fault(&ring->sched);
}

static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
				 struct amdgpu_irq_src *source,
				 struct amdgpu_iv_entry *entry)
{
	DRM_ERROR("Illegal register access in command stream\n");
	schedule_work(&adev->reset_work);
	gfx_v6_0_fault(adev, entry);
	return 0;
}

@@ -3407,7 +3426,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
				  struct amdgpu_iv_entry *entry)
{
	DRM_ERROR("Illegal instruction in command stream\n");
	schedule_work(&adev->reset_work);
	gfx_v6_0_fault(adev, entry);
	return 0;
}

+26 −2
Original line number Diff line number Diff line
@@ -4959,12 +4959,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
	return 0;
}

static void gfx_v7_0_fault(struct amdgpu_device *adev,
			   struct amdgpu_iv_entry *entry)
{
	struct amdgpu_ring *ring;
	u8 me_id, pipe_id;
	int i;

	me_id = (entry->ring_id & 0x0c) >> 2;
	pipe_id = (entry->ring_id & 0x03) >> 0;
	switch (me_id) {
	case 0:
		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
		break;
	case 1:
	case 2:
		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
			ring = &adev->gfx.compute_ring[i];
			if ((ring->me == me_id) && (ring->pipe == pipe_id))
				drm_sched_fault(&ring->sched);
		}
		break;
	}
}

static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
				 struct amdgpu_irq_src *source,
				 struct amdgpu_iv_entry *entry)
{
	DRM_ERROR("Illegal register access in command stream\n");
	schedule_work(&adev->reset_work);
	gfx_v7_0_fault(adev, entry);
	return 0;
}

@@ -4974,7 +4998,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
{
	DRM_ERROR("Illegal instruction in command stream\n");
	// XXX soft reset the gfx block only
	schedule_work(&adev->reset_work);
	gfx_v7_0_fault(adev, entry);
	return 0;
}

Loading