Commit 35a85ac6 authored by Ben Widawsky's avatar Ben Widawsky Committed by Daniel Vetter
Browse files

drm/i915: Add second slice l3 remapping



Certain HSW SKUs have a second bank of L3. This L3 remapping has a
separate register set, and interrupt from the first "slice". A slice is
simply a term to define some subset of the GPU's l3 cache. This patch
implements both the interrupt handler, and ability to communicate with
userspace about this second slice.

v2:  Remove redundant check about non-existent slice.
Change warning about interrupts of unknown slices to WARN_ON_ONCE
Handle the case where we get 2 slice interrupts concurrently, and switch
the tracking of interrupts to be non-destructive (all Ville)
Don't enable/mask the second slice parity interrupt for ivb/vlv (even
though all docs I can find claim it's rsvd) (Ville + Bryan)
Keep BYT excluded from L3 parity

v3: Fix the slice = ffs to be decremented by one (found by Ville). When
I initially did my testing on the series, I was using 1-based slice
counting, so this code was correct. Not sure why my simpler tests that
I've been running since then didn't pick it up sooner.

Reviewed-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: default avatarBen Widawsky <ben@bwidawsk.net>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 1c966dd2
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -917,9 +917,11 @@ struct i915_ums_state {
	int mm_suspended;
};

#define MAX_L3_SLICES 2
struct intel_l3_parity {
	u32 *remap_info;
	u32 *remap_info[MAX_L3_SLICES];
	struct work_struct error_work;
	int which_slice;
};

struct i915_gem_mm {
@@ -1686,6 +1688,7 @@ struct drm_i915_file_private {
#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)

#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev))

#define GT_FREQUENCY_MULTIPLIER 50

@@ -1946,7 +1949,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
void i915_gem_l3_remap(struct drm_device *dev);
void i915_gem_l3_remap(struct drm_device *dev, int slice);
void i915_gem_init_swizzling(struct drm_device *dev);
void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
int __must_check i915_gpu_idle(struct drm_device *dev);
+13 −13
Original line number Diff line number Diff line
@@ -4222,16 +4222,15 @@ i915_gem_idle(struct drm_device *dev)
	return 0;
}

void i915_gem_l3_remap(struct drm_device *dev)
void i915_gem_l3_remap(struct drm_device *dev, int slice)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
	u32 misccpctl;
	int i;

	if (!HAS_L3_GPU_CACHE(dev))
		return;

	if (!dev_priv->l3_parity.remap_info)
	if (!HAS_L3_GPU_CACHE(dev) || !remap_info)
		return;

	misccpctl = I915_READ(GEN7_MISCCPCTL);
@@ -4239,17 +4238,17 @@ void i915_gem_l3_remap(struct drm_device *dev)
	POSTING_READ(GEN7_MISCCPCTL);

	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
		u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
		if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
		u32 remap = I915_READ(reg_base + i);
		if (remap && remap != remap_info[i/4])
			DRM_DEBUG("0x%x was already programmed to %x\n",
				  GEN7_L3LOG_BASE + i, remap);
		if (remap && !dev_priv->l3_parity.remap_info[i/4])
				  reg_base + i, remap);
		if (remap && !remap_info[i/4])
			DRM_DEBUG_DRIVER("Clearing remapped register\n");
		I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
		I915_WRITE(reg_base + i, remap_info[i/4]);
	}

	/* Make sure all the writes land before disabling dop clock gating */
	POSTING_READ(GEN7_L3LOG_BASE);
	POSTING_READ(reg_base);

	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
}
@@ -4343,7 +4342,7 @@ int
i915_gem_init_hw(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
	int ret, i;

	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
		return -EIO;
@@ -4362,7 +4361,8 @@ i915_gem_init_hw(struct drm_device *dev)
		I915_WRITE(GEN7_MSG_CTL, temp);
	}

	i915_gem_l3_remap(dev);
	for (i = 0; i < NUM_L3_SLICES(dev); i++)
		i915_gem_l3_remap(dev, i);

	i915_gem_init_swizzling(dev);

+58 −31
Original line number Diff line number Diff line
@@ -888,9 +888,10 @@ static void ivybridge_parity_work(struct work_struct *work)
	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
						    l3_parity.error_work);
	u32 error_status, row, bank, subbank;
	char *parity_event[5];
	char *parity_event[6];
	uint32_t misccpctl;
	unsigned long flags;
	uint8_t slice = 0;

	/* We must turn off DOP level clock gating to access the L3 registers.
	 * In order to prevent a get/put style interface, acquire struct mutex
@@ -898,45 +899,64 @@ static void ivybridge_parity_work(struct work_struct *work)
	 */
	mutex_lock(&dev_priv->dev->struct_mutex);

	/* If we've screwed up tracking, just let the interrupt fire again */
	if (WARN_ON(!dev_priv->l3_parity.which_slice))
		goto out;

	misccpctl = I915_READ(GEN7_MISCCPCTL);
	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
	POSTING_READ(GEN7_MISCCPCTL);

	error_status = I915_READ(GEN7_L3CDERRST1);
	row = GEN7_PARITY_ERROR_ROW(error_status);
	bank = GEN7_PARITY_ERROR_BANK(error_status);
	subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
	while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
		u32 reg;

	I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID |
				    GEN7_L3CDERRST1_ENABLE);
	POSTING_READ(GEN7_L3CDERRST1);
		slice--;
		if (WARN_ON_ONCE(slice >= NUM_L3_SLICES(dev_priv->dev)))
			break;

	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
		dev_priv->l3_parity.which_slice &= ~(1<<slice);

	spin_lock_irqsave(&dev_priv->irq_lock, flags);
	ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
		reg = GEN7_L3CDERRST1 + (slice * 0x200);

	mutex_unlock(&dev_priv->dev->struct_mutex);
		error_status = I915_READ(reg);
		row = GEN7_PARITY_ERROR_ROW(error_status);
		bank = GEN7_PARITY_ERROR_BANK(error_status);
		subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);

		I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE);
		POSTING_READ(reg);

		parity_event[0] = I915_L3_PARITY_UEVENT "=1";
		parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
		parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
		parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
	parity_event[4] = NULL;
		parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice);
		parity_event[5] = NULL;

		kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
				   KOBJ_CHANGE, parity_event);

	DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n",
		  row, bank, subbank);
		DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n",
			  slice, row, bank, subbank);

		kfree(parity_event[4]);
		kfree(parity_event[3]);
		kfree(parity_event[2]);
		kfree(parity_event[1]);
	}

static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
	I915_WRITE(GEN7_MISCCPCTL, misccpctl);

out:
	WARN_ON(dev_priv->l3_parity.which_slice);
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
	ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv->dev));
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);

	mutex_unlock(&dev_priv->dev->struct_mutex);
}

static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir)
{
	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;

@@ -944,9 +964,16 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
		return;

	spin_lock(&dev_priv->irq_lock);
	ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
	ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR(dev));
	spin_unlock(&dev_priv->irq_lock);

	iir &= GT_PARITY_ERROR(dev);
	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1)
		dev_priv->l3_parity.which_slice |= 1 << 1;

	if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
		dev_priv->l3_parity.which_slice |= 1 << 0;

	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
}

@@ -981,8 +1008,8 @@ static void snb_gt_irq_handler(struct drm_device *dev,
		i915_handle_error(dev, false);
	}

	if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
		ivybridge_parity_error_irq_handler(dev);
	if (gt_iir & GT_PARITY_ERROR(dev))
		ivybridge_parity_error_irq_handler(dev, gt_iir);
}

#define HPD_STORM_DETECT_PERIOD 1000
@@ -2221,8 +2248,8 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
	dev_priv->gt_irq_mask = ~0;
	if (HAS_L3_GPU_CACHE(dev)) {
		/* L3 parity interrupt is always unmasked. */
		dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
		gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
		dev_priv->gt_irq_mask = ~GT_PARITY_ERROR(dev);
		gt_irqs |= GT_PARITY_ERROR(dev);
	}

	gt_irqs |= GT_RENDER_USER_INTERRUPT;
+7 −0
Original line number Diff line number Diff line
@@ -927,6 +927,7 @@
#define GT_BLT_USER_INTERRUPT			(1 << 22)
#define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
#define GT_BSD_USER_INTERRUPT			(1 << 12)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
#define GT_RENDER_CS_MASTER_ERROR_INTERRUPT	(1 <<  3)
@@ -937,6 +938,10 @@
#define PM_VEBOX_CS_ERROR_INTERRUPT		(1 << 12) /* hsw+ */
#define PM_VEBOX_USER_INTERRUPT			(1 << 10) /* hsw+ */

#define GT_PARITY_ERROR(dev) \
	(GT_RENDER_L3_PARITY_ERROR_INTERRUPT | \
	 IS_HASWELL(dev) ? GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 : 0)

/* These are all the "old" interrupts */
#define ILK_BSD_USER_INTERRUPT				(1<<5)
#define I915_PIPE_CONTROL_NOTIFY_INTERRUPT		(1<<18)
@@ -4747,6 +4752,7 @@

/* IVYBRIDGE DPF */
#define GEN7_L3CDERRST1			0xB008 /* L3CD Error Status 1 */
#define HSW_L3CDERRST11			0xB208 /* L3CD Error Status register 1 slice 1 */
#define   GEN7_L3CDERRST1_ROW_MASK	(0x7ff<<14)
#define   GEN7_PARITY_ERROR_VALID	(1<<13)
#define   GEN7_L3CDERRST1_BANK_MASK	(3<<11)
@@ -4760,6 +4766,7 @@
#define   GEN7_L3CDERRST1_ENABLE	(1<<7)

#define GEN7_L3LOG_BASE			0xB070
#define HSW_L3LOG_BASE_SLICE1		0xB270
#define GEN7_L3LOG_SIZE			0x80

#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
+27 −7
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
	struct drm_device *drm_dev = dminor->dev;
	struct drm_i915_private *dev_priv = drm_dev->dev_private;
	uint32_t misccpctl;
	int slice = (int)(uintptr_t)attr->private;
	int i, ret;

	count = round_down(count, 4);
@@ -134,9 +135,9 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
		return ret;

	if (IS_HASWELL(drm_dev)) {
		if (dev_priv->l3_parity.remap_info)
		if (dev_priv->l3_parity.remap_info[slice])
			memcpy(buf,
			       dev_priv->l3_parity.remap_info + (offset/4),
			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
			       count);
		else
			memset(buf, 0, count);
@@ -168,6 +169,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
	struct drm_device *drm_dev = dminor->dev;
	struct drm_i915_private *dev_priv = drm_dev->dev_private;
	u32 *temp = NULL; /* Just here to make handling failures easy */
	int slice = (int)(uintptr_t)attr->private;
	int ret;

	ret = l3_access_valid(drm_dev, offset);
@@ -178,7 +180,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
	if (ret)
		return ret;

	if (!dev_priv->l3_parity.remap_info) {
	if (!dev_priv->l3_parity.remap_info[slice]) {
		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
		if (!temp) {
			mutex_unlock(&drm_dev->struct_mutex);
@@ -198,11 +200,11 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
	 * at this point it is left as a TODO.
	*/
	if (temp)
		dev_priv->l3_parity.remap_info = temp;
		dev_priv->l3_parity.remap_info[slice] = temp;

	memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf, count);
	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);

	i915_gem_l3_remap(drm_dev);
	i915_gem_l3_remap(drm_dev, slice);

	mutex_unlock(&drm_dev->struct_mutex);

@@ -214,7 +216,17 @@ static struct bin_attribute dpf_attrs = {
	.size = GEN7_L3LOG_SIZE,
	.read = i915_l3_read,
	.write = i915_l3_write,
	.mmap = NULL
	.mmap = NULL,
	.private = (void *)0
};

static struct bin_attribute dpf_attrs_1 = {
	.attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)},
	.size = GEN7_L3LOG_SIZE,
	.read = i915_l3_read,
	.write = i915_l3_write,
	.mmap = NULL,
	.private = (void *)1
};

static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
@@ -525,6 +537,13 @@ void i915_setup_sysfs(struct drm_device *dev)
		ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs);
		if (ret)
			DRM_ERROR("l3 parity sysfs setup failed\n");

		if (NUM_L3_SLICES(dev) > 1) {
			ret = device_create_bin_file(&dev->primary->kdev,
						     &dpf_attrs_1);
			if (ret)
				DRM_ERROR("l3 parity slice 1 setup failed\n");
		}
	}

	ret = 0;
@@ -548,6 +567,7 @@ void i915_teardown_sysfs(struct drm_device *dev)
		sysfs_remove_files(&dev->primary->kdev.kobj, vlv_attrs);
	else
		sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs);
	device_remove_bin_file(&dev->primary->kdev,  &dpf_attrs_1);
	device_remove_bin_file(&dev->primary->kdev,  &dpf_attrs);
#ifdef CONFIG_PM
	sysfs_unmerge_group(&dev->primary->kdev.kobj, &rc6_attr_group);
Loading