[Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 remapping
Ben Widawsky
ben at bwidawsk.net
Tue Sep 17 20:45:30 CEST 2013
On Fri, Sep 13, 2013 at 12:38:01PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 12, 2013 at 10:28:31PM -0700, Ben Widawsky wrote:
> > Certain HSW SKUs have a second bank of L3. This L3 remapping has a
> > separate register set, and interrupt from the first "slice". A slice is
> > simply a term to define some subset of the GPU's l3 cache. This patch
> > implements both the interrupt handler, and ability to communicate with
> > userspace about this second slice.
> >
> > Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> > ---
> > drivers/gpu/drm/i915/i915_drv.h | 9 ++--
> > drivers/gpu/drm/i915/i915_gem.c | 26 ++++++----
> > drivers/gpu/drm/i915/i915_irq.c | 84 +++++++++++++++++++++------------
> > drivers/gpu/drm/i915/i915_reg.h | 6 +++
> > drivers/gpu/drm/i915/i915_sysfs.c | 34 ++++++++++---
> > drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +--
> > include/uapi/drm/i915_drm.h | 8 ++--
> > 7 files changed, 115 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 81ba5bb..eb90461 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -918,9 +918,11 @@ struct i915_ums_state {
> > int mm_suspended;
> > };
> >
> > +#define MAX_L3_SLICES 2
> > struct intel_l3_parity {
> > - u32 *remap_info;
> > + u32 *remap_info[MAX_L3_SLICES];
> > struct work_struct error_work;
> > + int which_slice;
> > };
> >
> > struct i915_gem_mm {
> > @@ -1686,7 +1688,8 @@ struct drm_i915_file_private {
> >
> > #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
> >
> > -#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
> > +#define HAS_L3_GPU_CACHE(dev) (INTEL_INFO(dev)->gen >= 7)
> > +#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev))
> >
> > #define GT_FREQUENCY_MULTIPLIER 50
> >
> > @@ -1947,7 +1950,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
> > int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
> > int __must_check i915_gem_init(struct drm_device *dev);
> > int __must_check i915_gem_init_hw(struct drm_device *dev);
> > -void i915_gem_l3_remap(struct drm_device *dev);
> > +void i915_gem_l3_remap(struct drm_device *dev, int slice);
> > void i915_gem_init_swizzling(struct drm_device *dev);
> > void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
> > int __must_check i915_gpu_idle(struct drm_device *dev);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 5b510a3..b11f7d6c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4256,16 +4256,21 @@ i915_gem_idle(struct drm_device *dev)
> > return 0;
> > }
> >
> > -void i915_gem_l3_remap(struct drm_device *dev)
> > +void i915_gem_l3_remap(struct drm_device *dev, int slice)
> > {
> > drm_i915_private_t *dev_priv = dev->dev_private;
> > + u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
> > + u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
> > u32 misccpctl;
> > int i;
> >
> > if (!HAS_L3_GPU_CACHE(dev))
> > return;
> >
> > - if (!dev_priv->l3_parity.remap_info)
> > + if (NUM_L3_SLICES(dev) < 2 && slice)
> > + return;
>
> This check is redundant as we should never populate
> l3_parity.remap_info[1] when there's no second slice.
>
Got it. Smashed the early exit check together while at it.
> > +
> > + if (!remap_info)
> > return;
> >
> > misccpctl = I915_READ(GEN7_MISCCPCTL);
> > @@ -4273,17 +4278,17 @@ void i915_gem_l3_remap(struct drm_device *dev)
> > POSTING_READ(GEN7_MISCCPCTL);
> >
> > for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
> > - u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
> > - if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
> > + u32 remap = I915_READ(reg_base + i);
> > + if (remap && remap != remap_info[i/4])
> > DRM_DEBUG("0x%x was already programmed to %x\n",
> > - GEN7_L3LOG_BASE + i, remap);
> > - if (remap && !dev_priv->l3_parity.remap_info[i/4])
> > + reg_base + i, remap);
> > + if (remap && !remap_info[i/4])
> > DRM_DEBUG_DRIVER("Clearing remapped register\n");
> > - I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
> > + I915_WRITE(reg_base + i, remap_info[i/4]);
> > }
> >
> > /* Make sure all the writes land before disabling dop clock gating */
> > - POSTING_READ(GEN7_L3LOG_BASE);
> > + POSTING_READ(reg_base);
> >
> > I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > }
> > @@ -4377,7 +4382,7 @@ int
> > i915_gem_init_hw(struct drm_device *dev)
> > {
> > drm_i915_private_t *dev_priv = dev->dev_private;
> > - int ret;
> > + int ret, i;
> >
> > if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
> > return -EIO;
> > @@ -4396,7 +4401,8 @@ i915_gem_init_hw(struct drm_device *dev)
> > I915_WRITE(GEN7_MSG_CTL, temp);
> > }
> >
> > - i915_gem_l3_remap(dev);
> > + for (i = 0; i < NUM_L3_SLICES(dev); i++)
> > + i915_gem_l3_remap(dev, i);
> >
> > i915_gem_init_swizzling(dev);
> >
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 13d26cf..62cdf05 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -882,9 +882,10 @@ static void ivybridge_parity_work(struct work_struct *work)
> > drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> > l3_parity.error_work);
> > u32 error_status, row, bank, subbank;
> > - char *parity_event[5];
> > + char *parity_event[6];
> > uint32_t misccpctl;
> > unsigned long flags;
> > + uint8_t slice = 0;
> >
> > /* We must turn off DOP level clock gating to access the L3 registers.
> > * In order to prevent a get/put style interface, acquire struct mutex
> > @@ -892,45 +893,63 @@ static void ivybridge_parity_work(struct work_struct *work)
> > */
> > mutex_lock(&dev_priv->dev->struct_mutex);
> >
> > + /* If we've screwed up tracking, just let the interrupt fire again */
> > + if (WARN_ON(!dev_priv->l3_parity.which_slice))
> > + goto out;
> > +
> > misccpctl = I915_READ(GEN7_MISCCPCTL);
> > I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > POSTING_READ(GEN7_MISCCPCTL);
> >
> > - error_status = I915_READ(GEN7_L3CDERRST1);
> > - row = GEN7_PARITY_ERROR_ROW(error_status);
> > - bank = GEN7_PARITY_ERROR_BANK(error_status);
> > - subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
> > + while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
> > + u32 reg;
> >
> > - I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID |
> > - GEN7_L3CDERRST1_ENABLE);
> > - POSTING_READ(GEN7_L3CDERRST1);
> > + if (WARN_ON(slice >= MAX_L3_SLICES))
> > + break;
>
> Could be >= NUM_L3_SLICES(dev) for a bit of extra paranoia. Also we
> would fail to clear invalid bits from which_slice in this case, and
> thus we'd get the WARN every time the work runs. But I guess this
> should never happen in any case so probably not worth worrying about
> this too much.
Not worth worrying, but I didn't mean to be so noisy. I've fixed this
with WARN_ON_ONCE.
>
> >
> > - I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > + dev_priv->l3_parity.which_slice &= ~(1<<slice);
> >
> > - spin_lock_irqsave(&dev_priv->irq_lock, flags);
> > - ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
> > - spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
> > + reg = GEN7_L3CDERRST1 + (slice * 0x200);
> >
> > - mutex_unlock(&dev_priv->dev->struct_mutex);
> > + error_status = I915_READ(reg);
> > + row = GEN7_PARITY_ERROR_ROW(error_status);
> > + bank = GEN7_PARITY_ERROR_BANK(error_status);
> > + subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
> > +
> > + I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | GEN7_L3CDERRST1_ENABLE);
> > + POSTING_READ(reg);
> > +
> > + parity_event[0] = I915_L3_PARITY_UEVENT "=1";
> > + parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
> > + parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
> > + parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
> > + parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", slice);
> > + parity_event[5] = NULL;
> >
> > - parity_event[0] = I915_L3_PARITY_UEVENT "=1";
> > - parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
> > - parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
> > - parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
> > - parity_event[4] = NULL;
> > + kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
> > + KOBJ_CHANGE, parity_event);
> >
> > - kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
> > - KOBJ_CHANGE, parity_event);
> > + DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = %d, Sub bank = %d.\n",
> > + slice, row, bank, subbank);
> >
> > - DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n",
> > - row, bank, subbank);
> > + kfree(parity_event[4]);
> > + kfree(parity_event[3]);
> > + kfree(parity_event[2]);
> > + kfree(parity_event[1]);
> > + }
> > +
> > + I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +
> > +out:
> > + WARN_ON(dev_priv->l3_parity.which_slice);
>
> First I figured the irq could rearm this behind our back, but we disable
> the irq until the work is done. So yeah, this is fine.
>
> > + spin_lock_irqsave(&dev_priv->irq_lock, flags);
> > + ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR);
>
> Is it actually safe to enable the second slice irq when there's no second
> slice? This docs say it's just "reserved", but no mention whether it RO or
> could there be side effects.
Tests on my machine appear to work. But I don't know for certain. Bryan,
could you answer this?
>
> > + spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
> >
> > - kfree(parity_event[3]);
> > - kfree(parity_event[2]);
> > - kfree(parity_event[1]);
> > + mutex_unlock(&dev_priv->dev->struct_mutex);
> > }
> >
> > -static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
> > +static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir)
> > {
> > drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
> >
> > @@ -938,9 +957,12 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
> > return;
> >
> > spin_lock(&dev_priv->irq_lock);
> > - ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
> > + ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR);
> > spin_unlock(&dev_priv->irq_lock);
> >
> > + iir &= GT_PARITY_ERROR;
> > + dev_priv->l3_parity.which_slice =
> > + 1 << (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 ? 1 : 0);
>
> What if both slices report an error at the same time?
I was thinking that such an event can not occur, but on rethinking it
you are right that it's possible. I really hope this never happens, but
it's fixed. Anyway, it should have been |=, not =
[snip]
I'll resend the patch after Bryan answers the question about both
interrupts.
--
Ben Widawsky, Intel Open Source Technology Center
More information about the Intel-gfx
mailing list