[Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 remapping

Ben Widawsky benjamin.widawsky at intel.com
Fri Sep 13 07:28:31 CEST 2013


Certain HSW SKUs have a second bank of L3. This L3 remapping has a
separate register set, and interrupt from the first "slice". A slice is
simply a term to define some subset of the GPU's l3 cache. This patch
implements both the interrupt handler, and ability to communicate with
userspace about this second slice.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |  9 ++--
 drivers/gpu/drm/i915/i915_gem.c         | 26 ++++++----
 drivers/gpu/drm/i915/i915_irq.c         | 84 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_reg.h         |  6 +++
 drivers/gpu/drm/i915/i915_sysfs.c       | 34 ++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 +--
 include/uapi/drm/i915_drm.h             |  8 ++--
 7 files changed, 115 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81ba5bb..eb90461 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -918,9 +918,11 @@ struct i915_ums_state {
 	int mm_suspended;
 };
 
+#define MAX_L3_SLICES 2
 struct intel_l3_parity {
-	u32 *remap_info;
+	u32 *remap_info[MAX_L3_SLICES];
 	struct work_struct error_work;
+	int which_slice;
 };
 
 struct i915_gem_mm {
@@ -1686,7 +1688,8 @@ struct drm_i915_file_private {
 
 #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
 
-#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
+#define HAS_L3_GPU_CACHE(dev) (INTEL_INFO(dev)->gen >= 7)
+#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev))
 
 #define GT_FREQUENCY_MULTIPLIER 50
 
@@ -1947,7 +1950,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-void i915_gem_l3_remap(struct drm_device *dev);
+void i915_gem_l3_remap(struct drm_device *dev, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b510a3..b11f7d6c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4256,16 +4256,21 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
-void i915_gem_l3_remap(struct drm_device *dev)
+void i915_gem_l3_remap(struct drm_device *dev, int slice)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
+	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	u32 misccpctl;
 	int i;
 
 	if (!HAS_L3_GPU_CACHE(dev))
 		return;
 
-	if (!dev_priv->l3_parity.remap_info)
+	if (NUM_L3_SLICES(dev) < 2 && slice)
+		return;
+
+	if (!remap_info)
 		return;
 
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
@@ -4273,17 +4278,17 @@ void i915_gem_l3_remap(struct drm_device *dev)
 	POSTING_READ(GEN7_MISCCPCTL);
 
 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-		u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
-		if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
+		u32 remap = I915_READ(reg_base + i);
+		if (remap && remap != remap_info[i/4])
 			DRM_DEBUG("0x%x was already programmed to %x\n",
-				  GEN7_L3LOG_BASE + i, remap);
-		if (remap && !dev_priv->l3_parity.remap_info[i/4])
+				  reg_base + i, remap);
+		if (remap && !remap_info[i/4])
 			DRM_DEBUG_DRIVER("Clearing remapped register\n");
-		I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
+		I915_WRITE(reg_base + i, remap_info[i/4]);
 	}
 
 	/* Make sure all the writes land before disabling dop clock gating */
-	POSTING_READ(GEN7_L3LOG_BASE);
+	POSTING_READ(reg_base);
 
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 }
@@ -4377,7 +4382,7 @@ int
 i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	int ret;
+	int ret, i;
 
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
@@ -4396,7 +4401,8 @@ i915_gem_init_hw(struct drm_device *dev)
 		I915_WRITE(GEN7_MSG_CTL, temp);
 	}
 
-	i915_gem_l3_remap(dev);
+	for (i = 0; i < NUM_L3_SLICES(dev); i++)
+		i915_gem_l3_remap(dev, i);
 
 	i915_gem_init_swizzling(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 13d26cf..62cdf05 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -882,9 +882,10 @@ static void ivybridge_parity_work(struct work_struct *work)
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    l3_parity.error_work);
 	u32 error_status, row, bank, subbank;
-	char *parity_event[5];
+	char *parity_event[6];
 	uint32_t misccpctl;
 	unsigned long flags;
+	uint8_t slice = 0;
 
 	/* We must turn off DOP level clock gating to access the L3 registers.
 	 * In order to prevent a get/put style interface, acquire struct mutex
@@ -892,45 +893,63 @@ static void ivybridge_parity_work(struct work_struct *work)
 	 */
 	mutex_lock(&dev_priv->dev->struct_mutex);
 
+	/* If we've screwed up tracking, just let the interrupt fire again */
+	if (WARN_ON(!dev_priv->l3_parity.which_slice))
+		goto out;
+
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
 	POSTING_READ(GEN7_MISCCPCTL);
 
-	error_status = I915_READ(GEN7_L3CDERRST1);
-	row = GEN7_PARITY_ERROR_ROW(error_status);
-	bank = GEN7_PARITY_ERROR_BANK(error_status);
-	subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
+	while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
+		u32 reg;
 
-	I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID |
-				    GEN7_L3CDERRST1_ENABLE);
-	POSTING_READ(GEN7_L3CDERRST1);
+		if (WARN_ON(slice >= MAX_L3_SLICES))
+			break;
 
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+		dev_priv->l3_parity.which_slice &= ~(1<<slice);
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+		reg = GEN7_L3CDERRST1 + (slice * 0x200);
 
-	mutex_unlock(&dev_priv->dev->struct_mutex);
+		error_status = I915_READ(reg);
+		row = GEN7_PARITY_ERROR_ROW(error_status);
+		bank = GEN7_PARITY_ERROR_BANK(error_status);
+		subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
+
+		I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE);
+		POSTING_READ(reg);
+
+		parity_event[0] = I915_L3_PARITY_UEVENT "=1";
+		parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
+		parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
+		parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
+		parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice);
+		parity_event[5] = NULL;
 
-	parity_event[0] = I915_L3_PARITY_UEVENT "=1";
-	parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
-	parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
-	parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
-	parity_event[4] = NULL;
+		kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
+				   KOBJ_CHANGE, parity_event);
 
-	kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
-			   KOBJ_CHANGE, parity_event);
+		DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n",
+			  slice, row, bank, subbank);
 
-	DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n",
-		  row, bank, subbank);
+		kfree(parity_event[4]);
+		kfree(parity_event[3]);
+		kfree(parity_event[2]);
+		kfree(parity_event[1]);
+	}
+
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+out:
+	WARN_ON(dev_priv->l3_parity.which_slice);
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
-	kfree(parity_event[3]);
-	kfree(parity_event[2]);
-	kfree(parity_event[1]);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
-static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
+static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
@@ -938,9 +957,12 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
 		return;
 
 	spin_lock(&dev_priv->irq_lock);
-	ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+	ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR);
 	spin_unlock(&dev_priv->irq_lock);
 
+	iir &= GT_PARITY_ERROR;
+	dev_priv->l3_parity.which_slice =
+		1 << (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 ? 1 : 0);
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 }
 
@@ -975,8 +997,8 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 		i915_handle_error(dev, false);
 	}
 
-	if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
-		ivybridge_parity_error_irq_handler(dev);
+	if (gt_iir & GT_PARITY_ERROR)
+		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
 #define HPD_STORM_DETECT_PERIOD 1000
@@ -2261,8 +2283,8 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 	dev_priv->gt_irq_mask = ~0;
 	if (HAS_L3_GPU_CACHE(dev)) {
 		/* L3 parity interrupt is always unmasked. */
-		dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-		gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+		dev_priv->gt_irq_mask = ~GT_PARITY_ERROR;
+		gt_irqs |= GT_PARITY_ERROR;
 	}
 
 	gt_irqs |= GT_RENDER_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bcee89b..4155a1d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -927,6 +927,7 @@
 #define GT_BLT_USER_INTERRUPT			(1 << 22)
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
+#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
 #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT	(1 <<  3)
@@ -937,6 +938,9 @@
 #define PM_VEBOX_CS_ERROR_INTERRUPT		(1 << 12) /* hsw+ */
 #define PM_VEBOX_USER_INTERRUPT			(1 << 10) /* hsw+ */
 
+#define GT_PARITY_ERROR				(GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 | \
+						 GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
+
 /* These are all the "old" interrupts */
 #define ILK_BSD_USER_INTERRUPT				(1<<5)
 #define I915_PIPE_CONTROL_NOTIFY_INTERRUPT		(1<<18)
@@ -4742,6 +4746,7 @@
 
 /* IVYBRIDGE DPF */
 #define GEN7_L3CDERRST1			0xB008 /* L3CD Error Status 1 */
+#define HSW_L3CDERRST11			0xB208 /* L3CD Error Status register 1 slice 1 */
 #define   GEN7_L3CDERRST1_ROW_MASK	(0x7ff<<14)
 #define   GEN7_PARITY_ERROR_VALID	(1<<13)
 #define   GEN7_L3CDERRST1_BANK_MASK	(3<<11)
@@ -4755,6 +4760,7 @@
 #define   GEN7_L3CDERRST1_ENABLE	(1<<7)
 
 #define GEN7_L3LOG_BASE			0xB070
+#define HSW_L3LOG_BASE_SLICE1		0xB270
 #define GEN7_L3LOG_SIZE			0x80
 
 #define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 43c2e81..d208f2d 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -119,6 +119,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
 	uint32_t misccpctl;
+	int slice = (int)(uintptr_t)attr->private;
 	int i, ret;
 
 	count = round_down(count, 4);
@@ -135,11 +136,11 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 
 	if (IS_HASWELL(drm_dev)) {
 		int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
-		if ((!dev_priv->l3_parity.remap_info))
+		if ((!dev_priv->l3_parity.remap_info[slice]))
 			memset(buf + offset, 0, last - offset);
 		else
 			memcpy(buf + offset,
-			       dev_priv->l3_parity.remap_info + (offset/4),
+			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
 			       last - offset);
 
 		i = last;
@@ -170,6 +171,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
 	u32 *temp = NULL; /* Just here to make handling failures easy */
+	int slice = (int)(uintptr_t)attr->private;
 	int ret;
 
 	ret = l3_access_valid(drm_dev, offset);
@@ -180,7 +182,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	if (ret)
 		return ret;
 
-	if (!dev_priv->l3_parity.remap_info) {
+	if (!dev_priv->l3_parity.remap_info[slice]) {
 		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
 		if (!temp) {
 			mutex_unlock(&drm_dev->struct_mutex);
@@ -200,11 +202,11 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	 * at this point it is left as a TODO.
 	*/
 	if (temp)
-		dev_priv->l3_parity.remap_info = temp;
+		dev_priv->l3_parity.remap_info[slice] = temp;
 
-	memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf, count);
+	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
 
-	i915_gem_l3_remap(drm_dev);
+	i915_gem_l3_remap(drm_dev, slice);
 
 	mutex_unlock(&drm_dev->struct_mutex);
 
@@ -216,7 +218,17 @@ static struct bin_attribute dpf_attrs = {
 	.size = GEN7_L3LOG_SIZE,
 	.read = i915_l3_read,
 	.write = i915_l3_write,
-	.mmap = NULL
+	.mmap = NULL,
+	.private = (void *)0
+};
+
+static struct bin_attribute dpf_attrs_1 = {
+	.attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)},
+	.size = GEN7_L3LOG_SIZE,
+	.read = i915_l3_read,
+	.write = i915_l3_write,
+	.mmap = NULL,
+	.private = (void *)1
 };
 
 static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
@@ -527,6 +539,13 @@ void i915_setup_sysfs(struct drm_device *dev)
 		ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs);
 		if (ret)
 			DRM_ERROR("l3 parity sysfs setup failed\n");
+
+		if (NUM_L3_SLICES(dev) > 1) {
+			ret = device_create_bin_file(&dev->primary->kdev,
+						     &dpf_attrs_1);
+			if (ret)
+				DRM_ERROR("l3 parity slice 1 setup failed\n");
+		}
 	}
 
 	ret = 0;
@@ -550,6 +569,7 @@ void i915_teardown_sysfs(struct drm_device *dev)
 		sysfs_remove_files(&dev->primary->kdev.kobj, vlv_attrs);
 	else
 		sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs);
+	device_remove_bin_file(&dev->primary->kdev,  &dpf_attrs_1);
 	device_remove_bin_file(&dev->primary->kdev,  &dpf_attrs);
 #ifdef CONFIG_PM
 	sysfs_unmerge_group(&dev->primary->kdev.kobj, &rc6_attr_group);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 686e5b2..3539b45 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -570,7 +570,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
 	if (HAS_L3_GPU_CACHE(dev))
-		I915_WRITE_IMR(ring, ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR);
 
 	return ret;
 }
@@ -1000,7 +1000,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 			I915_WRITE_IMR(ring,
 				       ~(ring->irq_enable_mask |
-					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
+					 GT_PARITY_ERROR));
 		else
 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
 		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
@@ -1021,7 +1021,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
 	if (--ring->irq_refcount == 0) {
 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 			I915_WRITE_IMR(ring,
-				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+				       ~GT_PARITY_ERROR);
 		else
 			I915_WRITE_IMR(ring, ~0);
 		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 55bb572..3a4e97b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -38,10 +38,10 @@
  *
  * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
  *	event from the gpu l3 cache. Additional information supplied is ROW,
- *	BANK, SUBBANK of the affected cacheline. Userspace should keep track of
- *	these events and if a specific cache-line seems to have a persistent
- *	error remap it with the l3 remapping tool supplied in intel-gpu-tools.
- *	The value supplied with the event is always 1.
+ *	BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
+ *	track of these events and if a specific cache-line seems to have a
+ *	persistent error remap it with the l3 remapping tool supplied in
+ *	intel-gpu-tools.  The value supplied with the event is always 1.
  *
  * I915_ERROR_UEVENT - Generated upon error detection, currently only via
  *	hangcheck. The error detection event is a good indicator of when things
-- 
1.8.4




More information about the Intel-gfx mailing list