[Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 remapping
Ben Widawsky
benjamin.widawsky at intel.com
Fri Sep 13 07:28:31 CEST 2013
Certain HSW SKUs have a second bank of L3. This L3 remapping has a
separate register set, and interrupt from the first "slice". A slice is
simply a term to define some subset of the GPU's l3 cache. This patch
implements both the interrupt handler, and ability to communicate with
userspace about this second slice.
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
drivers/gpu/drm/i915/i915_drv.h | 9 ++--
drivers/gpu/drm/i915/i915_gem.c | 26 ++++++----
drivers/gpu/drm/i915/i915_irq.c | 84 +++++++++++++++++++++------------
drivers/gpu/drm/i915/i915_reg.h | 6 +++
drivers/gpu/drm/i915/i915_sysfs.c | 34 ++++++++++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +--
include/uapi/drm/i915_drm.h | 8 ++--
7 files changed, 115 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81ba5bb..eb90461 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -918,9 +918,11 @@ struct i915_ums_state {
int mm_suspended;
};
+#define MAX_L3_SLICES 2
struct intel_l3_parity {
- u32 *remap_info;
+ u32 *remap_info[MAX_L3_SLICES];
struct work_struct error_work;
+ int which_slice;
};
struct i915_gem_mm {
@@ -1686,7 +1688,8 @@ struct drm_i915_file_private {
#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
-#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
+#define HAS_L3_GPU_CACHE(dev) (INTEL_INFO(dev)->gen >= 7)
+#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev))
#define GT_FREQUENCY_MULTIPLIER 50
@@ -1947,7 +1950,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
-void i915_gem_l3_remap(struct drm_device *dev);
+void i915_gem_l3_remap(struct drm_device *dev, int slice);
void i915_gem_init_swizzling(struct drm_device *dev);
void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
int __must_check i915_gpu_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b510a3..b11f7d6c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4256,16 +4256,21 @@ i915_gem_idle(struct drm_device *dev)
return 0;
}
-void i915_gem_l3_remap(struct drm_device *dev)
+void i915_gem_l3_remap(struct drm_device *dev, int slice)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
+ u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
u32 misccpctl;
int i;
if (!HAS_L3_GPU_CACHE(dev))
return;
- if (!dev_priv->l3_parity.remap_info)
+ if (NUM_L3_SLICES(dev) < 2 && slice)
+ return;
+
+ if (!remap_info)
return;
misccpctl = I915_READ(GEN7_MISCCPCTL);
@@ -4273,17 +4278,17 @@ void i915_gem_l3_remap(struct drm_device *dev)
POSTING_READ(GEN7_MISCCPCTL);
for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
- u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
- if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
+ u32 remap = I915_READ(reg_base + i);
+ if (remap && remap != remap_info[i/4])
DRM_DEBUG("0x%x was already programmed to %x\n",
- GEN7_L3LOG_BASE + i, remap);
- if (remap && !dev_priv->l3_parity.remap_info[i/4])
+ reg_base + i, remap);
+ if (remap && !remap_info[i/4])
DRM_DEBUG_DRIVER("Clearing remapped register\n");
- I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
+ I915_WRITE(reg_base + i, remap_info[i/4]);
}
/* Make sure all the writes land before disabling dop clock gating */
- POSTING_READ(GEN7_L3LOG_BASE);
+ POSTING_READ(reg_base);
I915_WRITE(GEN7_MISCCPCTL, misccpctl);
}
@@ -4377,7 +4382,7 @@ int
i915_gem_init_hw(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- int ret;
+ int ret, i;
if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
return -EIO;
@@ -4396,7 +4401,8 @@ i915_gem_init_hw(struct drm_device *dev)
I915_WRITE(GEN7_MSG_CTL, temp);
}
- i915_gem_l3_remap(dev);
+ for (i = 0; i < NUM_L3_SLICES(dev); i++)
+ i915_gem_l3_remap(dev, i);
i915_gem_init_swizzling(dev);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 13d26cf..62cdf05 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -882,9 +882,10 @@ static void ivybridge_parity_work(struct work_struct *work)
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
l3_parity.error_work);
u32 error_status, row, bank, subbank;
- char *parity_event[5];
+ char *parity_event[6];
uint32_t misccpctl;
unsigned long flags;
+ uint8_t slice = 0;
/* We must turn off DOP level clock gating to access the L3 registers.
* In order to prevent a get/put style interface, acquire struct mutex
@@ -892,45 +893,63 @@ static void ivybridge_parity_work(struct work_struct *work)
*/
mutex_lock(&dev_priv->dev->struct_mutex);
+ /* If we've screwed up tracking, just let the interrupt fire again */
+ if (WARN_ON(!dev_priv->l3_parity.which_slice))
+ goto out;
+
misccpctl = I915_READ(GEN7_MISCCPCTL);
I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
POSTING_READ(GEN7_MISCCPCTL);
- error_status = I915_READ(GEN7_L3CDERRST1);
- row = GEN7_PARITY_ERROR_ROW(error_status);
- bank = GEN7_PARITY_ERROR_BANK(error_status);
- subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
+ while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
+ u32 reg;
- I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID |
- GEN7_L3CDERRST1_ENABLE);
- POSTING_READ(GEN7_L3CDERRST1);
+ if (WARN_ON(slice >= MAX_L3_SLICES))
+ break;
- I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+ dev_priv->l3_parity.which_slice &= ~(1<<slice);
- spin_lock_irqsave(&dev_priv->irq_lock, flags);
- ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
- spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+ reg = GEN7_L3CDERRST1 + (slice * 0x200);
- mutex_unlock(&dev_priv->dev->struct_mutex);
+ error_status = I915_READ(reg);
+ row = GEN7_PARITY_ERROR_ROW(error_status);
+ bank = GEN7_PARITY_ERROR_BANK(error_status);
+ subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
+
+ I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE);
+ POSTING_READ(reg);
+
+ parity_event[0] = I915_L3_PARITY_UEVENT "=1";
+ parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
+ parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
+ parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
+ parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice);
+ parity_event[5] = NULL;
- parity_event[0] = I915_L3_PARITY_UEVENT "=1";
- parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
- parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
- parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
- parity_event[4] = NULL;
+ kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
+ KOBJ_CHANGE, parity_event);
- kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
- KOBJ_CHANGE, parity_event);
+ DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n",
+ slice, row, bank, subbank);
- DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n",
- row, bank, subbank);
+ kfree(parity_event[4]);
+ kfree(parity_event[3]);
+ kfree(parity_event[2]);
+ kfree(parity_event[1]);
+ }
+
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+out:
+ WARN_ON(dev_priv->l3_parity.which_slice);
+ spin_lock_irqsave(&dev_priv->irq_lock, flags);
+ ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR);
+ spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
- kfree(parity_event[3]);
- kfree(parity_event[2]);
- kfree(parity_event[1]);
+ mutex_unlock(&dev_priv->dev->struct_mutex);
}
-static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
+static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir)
{
drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -938,9 +957,12 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
return;
spin_lock(&dev_priv->irq_lock);
- ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+ ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR);
spin_unlock(&dev_priv->irq_lock);
+ iir &= GT_PARITY_ERROR;
+ dev_priv->l3_parity.which_slice =
+ 1 << (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 ? 1 : 0);
queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
}
@@ -975,8 +997,8 @@ static void snb_gt_irq_handler(struct drm_device *dev,
i915_handle_error(dev, false);
}
- if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
- ivybridge_parity_error_irq_handler(dev);
+ if (gt_iir & GT_PARITY_ERROR)
+ ivybridge_parity_error_irq_handler(dev, gt_iir);
}
#define HPD_STORM_DETECT_PERIOD 1000
@@ -2261,8 +2283,8 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
dev_priv->gt_irq_mask = ~0;
if (HAS_L3_GPU_CACHE(dev)) {
/* L3 parity interrupt is always unmasked. */
- dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
- gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+ dev_priv->gt_irq_mask = ~GT_PARITY_ERROR;
+ gt_irqs |= GT_PARITY_ERROR;
}
gt_irqs |= GT_RENDER_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bcee89b..4155a1d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -927,6 +927,7 @@
#define GT_BLT_USER_INTERRUPT (1 << 22)
#define GT_BSD_CS_ERROR_INTERRUPT (1 << 15)
#define GT_BSD_USER_INTERRUPT (1 << 12)
+#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)
#define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3)
@@ -937,6 +938,9 @@
#define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */
#define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */
+#define GT_PARITY_ERROR (GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 | \
+ GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
+
/* These are all the "old" interrupts */
#define ILK_BSD_USER_INTERRUPT (1<<5)
#define I915_PIPE_CONTROL_NOTIFY_INTERRUPT (1<<18)
@@ -4742,6 +4746,7 @@
/* IVYBRIDGE DPF */
#define GEN7_L3CDERRST1 0xB008 /* L3CD Error Status 1 */
+#define HSW_L3CDERRST11 0xB208 /* L3CD Error Status register 1 slice 1 */
#define GEN7_L3CDERRST1_ROW_MASK (0x7ff<<14)
#define GEN7_PARITY_ERROR_VALID (1<<13)
#define GEN7_L3CDERRST1_BANK_MASK (3<<11)
@@ -4755,6 +4760,7 @@
#define GEN7_L3CDERRST1_ENABLE (1<<7)
#define GEN7_L3LOG_BASE 0xB070
+#define HSW_L3LOG_BASE_SLICE1 0xB270
#define GEN7_L3LOG_SIZE 0x80
#define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 43c2e81..d208f2d 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -119,6 +119,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
struct drm_device *drm_dev = dminor->dev;
struct drm_i915_private *dev_priv = drm_dev->dev_private;
uint32_t misccpctl;
+ int slice = (int)(uintptr_t)attr->private;
int i, ret;
count = round_down(count, 4);
@@ -135,11 +136,11 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
if (IS_HASWELL(drm_dev)) {
int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
- if ((!dev_priv->l3_parity.remap_info))
+ if ((!dev_priv->l3_parity.remap_info[slice]))
memset(buf + offset, 0, last - offset);
else
memcpy(buf + offset,
- dev_priv->l3_parity.remap_info + (offset/4),
+ dev_priv->l3_parity.remap_info[slice] + (offset/4),
last - offset);
i = last;
@@ -170,6 +171,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
struct drm_device *drm_dev = dminor->dev;
struct drm_i915_private *dev_priv = drm_dev->dev_private;
u32 *temp = NULL; /* Just here to make handling failures easy */
+ int slice = (int)(uintptr_t)attr->private;
int ret;
ret = l3_access_valid(drm_dev, offset);
@@ -180,7 +182,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
if (ret)
return ret;
- if (!dev_priv->l3_parity.remap_info) {
+ if (!dev_priv->l3_parity.remap_info[slice]) {
temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
if (!temp) {
mutex_unlock(&drm_dev->struct_mutex);
@@ -200,11 +202,11 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
* at this point it is left as a TODO.
*/
if (temp)
- dev_priv->l3_parity.remap_info = temp;
+ dev_priv->l3_parity.remap_info[slice] = temp;
- memcpy(dev_priv->l3_parity.remap_info + (offset/4), buf, count);
+ memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
- i915_gem_l3_remap(drm_dev);
+ i915_gem_l3_remap(drm_dev, slice);
mutex_unlock(&drm_dev->struct_mutex);
@@ -216,7 +218,17 @@ static struct bin_attribute dpf_attrs = {
.size = GEN7_L3LOG_SIZE,
.read = i915_l3_read,
.write = i915_l3_write,
- .mmap = NULL
+ .mmap = NULL,
+ .private = (void *)0
+};
+
+static struct bin_attribute dpf_attrs_1 = {
+ .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)},
+ .size = GEN7_L3LOG_SIZE,
+ .read = i915_l3_read,
+ .write = i915_l3_write,
+ .mmap = NULL,
+ .private = (void *)1
};
static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
@@ -527,6 +539,13 @@ void i915_setup_sysfs(struct drm_device *dev)
ret = device_create_bin_file(&dev->primary->kdev, &dpf_attrs);
if (ret)
DRM_ERROR("l3 parity sysfs setup failed\n");
+
+ if (NUM_L3_SLICES(dev) > 1) {
+ ret = device_create_bin_file(&dev->primary->kdev,
+ &dpf_attrs_1);
+ if (ret)
+ DRM_ERROR("l3 parity slice 1 setup failed\n");
+ }
}
ret = 0;
@@ -550,6 +569,7 @@ void i915_teardown_sysfs(struct drm_device *dev)
sysfs_remove_files(&dev->primary->kdev.kobj, vlv_attrs);
else
sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs);
+ device_remove_bin_file(&dev->primary->kdev, &dpf_attrs_1);
device_remove_bin_file(&dev->primary->kdev, &dpf_attrs);
#ifdef CONFIG_PM
sysfs_unmerge_group(&dev->primary->kdev.kobj, &rc6_attr_group);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 686e5b2..3539b45 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -570,7 +570,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
if (HAS_L3_GPU_CACHE(dev))
- I915_WRITE_IMR(ring, ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+ I915_WRITE_IMR(ring, ~GT_PARITY_ERROR);
return ret;
}
@@ -1000,7 +1000,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring,
~(ring->irq_enable_mask |
- GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
+ GT_PARITY_ERROR));
else
I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
@@ -1021,7 +1021,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
if (--ring->irq_refcount == 0) {
if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring,
- ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+ ~GT_PARITY_ERROR);
else
I915_WRITE_IMR(ring, ~0);
ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 55bb572..3a4e97b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -38,10 +38,10 @@
*
* I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
* event from the gpu l3 cache. Additional information supplied is ROW,
- * BANK, SUBBANK of the affected cacheline. Userspace should keep track of
- * these events and if a specific cache-line seems to have a persistent
- * error remap it with the l3 remapping tool supplied in intel-gpu-tools.
- * The value supplied with the event is always 1.
+ * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
+ * track of these events and if a specific cache-line seems to have a
+ * persistent error remap it with the l3 remapping tool supplied in
+ * intel-gpu-tools. The value supplied with the event is always 1.
*
* I915_ERROR_UEVENT - Generated upon error detection, currently only via
* hangcheck. The error detection event is a good indicator of when things
--
1.8.4
More information about the Intel-gfx
mailing list