Commit 8a905236 authored by Jesse Barnes's avatar Jesse Barnes Committed by Eric Anholt
Browse files

drm/i915: refactor error detection & collection



This patch refactors the existing error detection and collection code,
placing most of it in i915_handle_error(). Additionally, we introduce a
work queue for scheduling post-crash tasks such as generating a uevent.
Using the uevent facility, userspace should be able to capture a
post-mortem dump for diagnostics.

Signed-off-by: default avatarJesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: default avatarBen Gamari <bgamari.foss@gmail.com>
Signed-off-by: default avatarEric Anholt <eric@anholt.net>
parent 832cc28d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -229,6 +229,7 @@ typedef struct drm_i915_private {

	spinlock_t error_lock;
	struct drm_i915_error_state *first_error;
	struct work_struct error_work;

	/* Register state */
	u8 saveLBB;
+2 −0
Original line number Diff line number Diff line
@@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused)

	error = dev_priv->first_error;

	seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
		   error->time.tv_usec);
	seq_printf(m, "EIR: 0x%08x\n", error->eir);
	seq_printf(m, "  PGTBL_ER: 0x%08x\n", error->pgtbl_er);
	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
+158 −74
Original line number Diff line number Diff line
@@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
	return ret;
}

/**
 * i915_error_work_func - do process context error handling work
 * @work: work struct
 *
 * Fire an error uevent so userspace can see that a hang or error
 * was detected.
 */
static void i915_error_work_func(struct work_struct *work)
{
	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
						    error_work);
	struct drm_device *dev = dev_priv->dev;
	char *event_string = "ERROR=1";
	char *envp[] = { event_string, NULL };

	DRM_DEBUG("generating error event\n");

	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
}

/**
 * i915_capture_error_state - capture an error record for later analysis
 * @dev: drm device
 *
 * Should be called when an error is detected (either a hang or an error
 * interrupt) to capture error state from the time of the error.  Fills
 * out a structure which becomes available in debugfs for user level tools
 * to pick up.
 */
static void i915_capture_error_state(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev)
		error->acthd = I915_READ(ACTHD_I965);
	}

	do_gettimeofday(&error->time);

	dev_priv->first_error = error;

out:
	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
}

/**
 * i915_handle_error - handle an error interrupt
 * @dev: drm device
 *
 * Do some basic checking of regsiter state at error interrupt time and
 * dump it to the syslog.  Also call i915_capture_error_state() to make
 * sure we get a record and make it available in debugfs.  Fire a uevent
 * so userspace knows something bad happened (should trigger collection
 * of a ring dump etc.).
 */
static void i915_handle_error(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	u32 eir = I915_READ(EIR);
	u32 pipea_stats = I915_READ(PIPEASTAT);
	u32 pipeb_stats = I915_READ(PIPEBSTAT);

	i915_capture_error_state(dev);

	printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
	       eir);

	if (IS_G4X(dev)) {
		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
			u32 ipeir = I915_READ(IPEIR_I965);

			printk(KERN_ERR "  IPEIR: 0x%08x\n",
			       I915_READ(IPEIR_I965));
			printk(KERN_ERR "  IPEHR: 0x%08x\n",
			       I915_READ(IPEHR_I965));
			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
			       I915_READ(INSTDONE_I965));
			printk(KERN_ERR "  INSTPS: 0x%08x\n",
			       I915_READ(INSTPS));
			printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
			       I915_READ(INSTDONE1));
			printk(KERN_ERR "  ACTHD: 0x%08x\n",
			       I915_READ(ACTHD_I965));
			I915_WRITE(IPEIR_I965, ipeir);
			(void)I915_READ(IPEIR_I965);
		}
		if (eir & GM45_ERROR_PAGE_TABLE) {
			u32 pgtbl_err = I915_READ(PGTBL_ER);
			printk(KERN_ERR "page table error\n");
			printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
			       pgtbl_err);
			I915_WRITE(PGTBL_ER, pgtbl_err);
			(void)I915_READ(PGTBL_ER);
		}
	}

	if (IS_I9XX(dev)) {
		if (eir & I915_ERROR_PAGE_TABLE) {
			u32 pgtbl_err = I915_READ(PGTBL_ER);
			printk(KERN_ERR "page table error\n");
			printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
			       pgtbl_err);
			I915_WRITE(PGTBL_ER, pgtbl_err);
			(void)I915_READ(PGTBL_ER);
		}
	}

	if (eir & I915_ERROR_MEMORY_REFRESH) {
		printk(KERN_ERR "memory refresh error\n");
		printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
		       pipea_stats);
		printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
		       pipeb_stats);
		/* pipestat has already been acked */
	}
	if (eir & I915_ERROR_INSTRUCTION) {
		printk(KERN_ERR "instruction error\n");
		printk(KERN_ERR "  INSTPM: 0x%08x\n",
		       I915_READ(INSTPM));
		if (!IS_I965G(dev)) {
			u32 ipeir = I915_READ(IPEIR);

			printk(KERN_ERR "  IPEIR: 0x%08x\n",
			       I915_READ(IPEIR));
			printk(KERN_ERR "  IPEHR: 0x%08x\n",
			       I915_READ(IPEHR));
			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
			       I915_READ(INSTDONE));
			printk(KERN_ERR "  ACTHD: 0x%08x\n",
			       I915_READ(ACTHD));
			I915_WRITE(IPEIR, ipeir);
			(void)I915_READ(IPEIR);
		} else {
			u32 ipeir = I915_READ(IPEIR_I965);

			printk(KERN_ERR "  IPEIR: 0x%08x\n",
			       I915_READ(IPEIR_I965));
			printk(KERN_ERR "  IPEHR: 0x%08x\n",
			       I915_READ(IPEHR_I965));
			printk(KERN_ERR "  INSTDONE: 0x%08x\n",
			       I915_READ(INSTDONE_I965));
			printk(KERN_ERR "  INSTPS: 0x%08x\n",
			       I915_READ(INSTPS));
			printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
			       I915_READ(INSTDONE1));
			printk(KERN_ERR "  ACTHD: 0x%08x\n",
			       I915_READ(ACTHD_I965));
			I915_WRITE(IPEIR_I965, ipeir);
			(void)I915_READ(IPEIR_I965);
		}
	}

	I915_WRITE(EIR, eir);
	(void)I915_READ(EIR);
	eir = I915_READ(EIR);
	if (eir) {
		/*
		 * some errors might have become stuck,
		 * mask them.
		 */
		DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
		I915_WRITE(EMR, I915_READ(EMR) | eir);
		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
	}

	schedule_work(&dev_priv->error_work);
}

irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
{
	struct drm_device *dev = (struct drm_device *) arg;
@@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
		pipea_stats = I915_READ(PIPEASTAT);
		pipeb_stats = I915_READ(PIPEBSTAT);

		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
			i915_handle_error(dev);

		/*
		 * Clear the PIPE(A|B)STAT regs before the IIR
		 */
@@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
			I915_READ(PORT_HOTPLUG_STAT);
		}

		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) {
			u32 eir = I915_READ(EIR);

			i915_capture_error_state(dev);

			printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
			       eir);
			if (eir & I915_ERROR_PAGE_TABLE) {
				u32 pgtbl_err = I915_READ(PGTBL_ER);
				printk(KERN_ERR "page table error\n");
				printk(KERN_ERR "  PGTBL_ER: 0x%08x\n",
				       pgtbl_err);
				I915_WRITE(PGTBL_ER, pgtbl_err);
				(void)I915_READ(PGTBL_ER);
			}
			if (eir & I915_ERROR_MEMORY_REFRESH) {
				printk(KERN_ERR "memory refresh error\n");
				printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
				       pipea_stats);
				printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
				       pipeb_stats);
				/* pipestat has already been acked */
			}
			if (eir & I915_ERROR_INSTRUCTION) {
				printk(KERN_ERR "instruction error\n");
				printk(KERN_ERR "  INSTPM: 0x%08x\n",
				       I915_READ(INSTPM));
				if (!IS_I965G(dev)) {
					u32 ipeir = I915_READ(IPEIR);

					printk(KERN_ERR "  IPEIR: 0x%08x\n",
					       I915_READ(IPEIR));
					printk(KERN_ERR "  IPEHR: 0x%08x\n",
						   I915_READ(IPEHR));
					printk(KERN_ERR "  INSTDONE: 0x%08x\n",
						   I915_READ(INSTDONE));
					printk(KERN_ERR "  ACTHD: 0x%08x\n",
						   I915_READ(ACTHD));
					I915_WRITE(IPEIR, ipeir);
					(void)I915_READ(IPEIR);
				} else {
					u32 ipeir = I915_READ(IPEIR_I965);

					printk(KERN_ERR "  IPEIR: 0x%08x\n",
					       I915_READ(IPEIR_I965));
					printk(KERN_ERR "  IPEHR: 0x%08x\n",
					       I915_READ(IPEHR_I965));
					printk(KERN_ERR "  INSTDONE: 0x%08x\n",
					       I915_READ(INSTDONE_I965));
					printk(KERN_ERR "  INSTPS: 0x%08x\n",
					       I915_READ(INSTPS));
					printk(KERN_ERR "  INSTDONE1: 0x%08x\n",
					       I915_READ(INSTDONE1));
					printk(KERN_ERR "  ACTHD: 0x%08x\n",
					       I915_READ(ACTHD_I965));
					I915_WRITE(IPEIR_I965, ipeir);
					(void)I915_READ(IPEIR_I965);
				}
			}

			I915_WRITE(EIR, eir);
			(void)I915_READ(EIR);
			eir = I915_READ(EIR);
			if (eir) {
				/*
				 * some errors might have become stuck,
				 * mask them.
				 */
				DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
				I915_WRITE(EMR, I915_READ(EMR) | eir);
				I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
			}
		}

		I915_WRITE(IIR, iir);
		new_iir = I915_READ(IIR); /* Flush posted writes */

@@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
	atomic_set(&dev_priv->irq_received, 0);

	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
	INIT_WORK(&dev_priv->error_work, i915_error_work_func);

	if (IS_IGDNG(dev)) {
		igdng_irq_preinstall(dev);