[Intel-gfx] [PATCH 2/2] [v2] drm/i915: Disable GGTT PTEs on GEN6+ suspend
Todd Previte
tprevite at gmail.com
Fri Oct 18 02:20:33 CEST 2013
On 10/16/13 9:21 AM, Ben Widawsky wrote:
> Once the machine gets to a certain point in the suspend process, we
> expect the GPU to be idle. If it is not, we might corrupt memory.
> Empirically (with an early version of this patch) we have seen this is
> not the case. We cannot currently explain why the latent GPU writes
> occur.
>
> In the technical sense, this patch is a workaround in that we have an
> issue we can't explain, and the patch indirectly solves the issue.
> However, it's really better than a workaround because we understand why
> it works, and it really should be a safe thing to do in all cases.
>
> The noticeable effect other than the debug messages would be an increase
> in the suspend time. I have not measure how expensive it actually is.
>
> I think it would be good to spend further time to root cause why we're
> seeing these latent writes, but it shouldn't preclude preventing the
> fallout.
>
> NOTE: It should be safe (and makes some sense IMO) to also keep the
> VALID bit unset on resume when we clear_range(). I've opted not to do
> this as properly clearing those bits at some later point would be extra
> work.
>
> v2: Fix bugzilla link
>
> Bugzilla: http://bugs.freedesktop.org/6549://bugs.freedesktop.org/show_bug.cgi?id=65496
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59321
> Tested-by: Takashi Iwai <tiwai at suse.de>
> Tested-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
Tested-By: Todd Previte <tprevite at gmail.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 5 ++-
> drivers/gpu/drm/i915/i915_drv.h | 5 ++-
> drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++++++++++++++++++-----
> drivers/gpu/drm/i915/i915_reg.h | 4 ++
> 4 files changed, 78 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 59649c0..ea90c5f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -502,6 +502,8 @@ static int i915_drm_freeze(struct drm_device *dev)
> intel_modeset_suspend_hw(dev);
> }
>
> + i915_gem_suspend_gtt_mappings(dev);
> +
> i915_save_state(dev);
>
> intel_opregion_fini(dev);
> @@ -587,7 +589,8 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings)
> mutex_lock(&dev->struct_mutex);
> i915_gem_restore_gtt_mappings(dev);
> mutex_unlock(&dev->struct_mutex);
> - }
> + } else if (drm_core_check_feature(dev, DRIVER_MODESET))
> + i915_check_and_clear_faults(dev);
>
> intel_init_power_well(dev);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0cbeb0e..7ca99350 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -521,7 +521,8 @@ struct i915_address_space {
> bool valid); /* Create a valid PTE */
> void (*clear_range)(struct i915_address_space *vm,
> unsigned int first_entry,
> - unsigned int num_entries);
> + unsigned int num_entries,
> + bool use_scratch);
> void (*insert_entries)(struct i915_address_space *vm,
> struct sg_table *st,
> unsigned int first_entry,
> @@ -2143,6 +2144,8 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_object *obj);
>
> +void i915_check_and_clear_faults(struct drm_device *dev);
> +void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
> void i915_gem_restore_gtt_mappings(struct drm_device *dev);
> int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
> void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 81dce29..c4c42e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -241,7 +241,8 @@ static int gen6_ppgtt_enable(struct drm_device *dev)
> /* PPGTT support for Sandybdrige/Gen6 and later */
> static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> unsigned first_entry,
> - unsigned num_entries)
> + unsigned num_entries,
> + bool use_scratch)
> {
> struct i915_hw_ppgtt *ppgtt =
> container_of(vm, struct i915_hw_ppgtt, base);
> @@ -372,7 +373,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> }
>
> ppgtt->base.clear_range(&ppgtt->base, 0,
> - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES);
> + ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
>
> ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
>
> @@ -449,7 +450,8 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> {
> ppgtt->base.clear_range(&ppgtt->base,
> i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
> - obj->base.size >> PAGE_SHIFT);
> + obj->base.size >> PAGE_SHIFT,
> + true);
> }
>
> extern int intel_iommu_gfx_mapped;
> @@ -490,15 +492,65 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
> dev_priv->mm.interruptible = interruptible;
> }
>
> +void i915_check_and_clear_faults(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_ring_buffer *ring;
> + int i;
> +
> + if (INTEL_INFO(dev)->gen < 6)
> + return;
> +
> + for_each_ring(ring, dev_priv, i) {
> + u32 fault_reg;
> + fault_reg = I915_READ(RING_FAULT_REG(ring));
> + if (fault_reg & RING_FAULT_VALID) {
> + DRM_DEBUG_DRIVER("Unexpected fault\n"
> + "\tAddr: 0x%08lx\\n"
> + "\tAddress space: %s\n"
> + "\tSource ID: %d\n"
> + "\tType: %d\n",
> + fault_reg & PAGE_MASK,
> + fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
> + RING_FAULT_SRCID(fault_reg),
> + RING_FAULT_FAULT_TYPE(fault_reg));
> + I915_WRITE(RING_FAULT_REG(ring),
> + fault_reg & ~RING_FAULT_VALID);
> + }
> + }
> + POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
> +}
> +
> +void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + /* Don't bother messing with faults pre GEN6 as we have little
> + * documentation supporting that it's a good idea.
> + */
> + if (INTEL_INFO(dev)->gen < 6)
> + return;
> +
> + i915_check_and_clear_faults(dev);
> +
> + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> + dev_priv->gtt.base.start / PAGE_SIZE,
> + dev_priv->gtt.base.total / PAGE_SIZE,
> + false);
> +}
> +
> void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_gem_object *obj;
>
> + i915_check_and_clear_faults(dev);
> +
> /* First fill our portion of the GTT with scratch pages */
> dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> dev_priv->gtt.base.start / PAGE_SIZE,
> - dev_priv->gtt.base.total / PAGE_SIZE);
> + dev_priv->gtt.base.total / PAGE_SIZE,
> + true);
>
> list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> i915_gem_clflush_object(obj, obj->pin_display);
> @@ -565,7 +617,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>
> static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> unsigned int first_entry,
> - unsigned int num_entries)
> + unsigned int num_entries,
> + bool use_scratch)
> {
> struct drm_i915_private *dev_priv = vm->dev->dev_private;
> gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
> @@ -578,7 +631,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> first_entry, num_entries, max_entries))
> num_entries = max_entries;
>
> - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
> +
> for (i = 0; i < num_entries; i++)
> iowrite32(scratch_pte, >t_base[i]);
> readl(gtt_base);
> @@ -599,7 +653,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
>
> static void i915_ggtt_clear_range(struct i915_address_space *vm,
> unsigned int first_entry,
> - unsigned int num_entries)
> + unsigned int num_entries,
> + bool unused)
> {
> intel_gtt_clear_range(first_entry, num_entries);
> }
> @@ -627,7 +682,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
>
> dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> entry,
> - obj->base.size >> PAGE_SHIFT);
> + obj->base.size >> PAGE_SHIFT,
> + true);
>
> obj->has_global_gtt_mapping = 0;
> }
> @@ -714,11 +770,11 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
> const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
> DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> hole_start, hole_end);
> - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count);
> + ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
> }
>
> /* And finally clear the reserved guard page */
> - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1);
> + ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
> }
>
> static bool
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 13153c3..f98f584 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -656,6 +656,10 @@
> #define ARB_MODE_SWIZZLE_IVB (1<<5)
> #define RENDER_HWS_PGA_GEN7 (0x04080)
> #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id)
> +#define RING_FAULT_GTTSEL_MASK (1<<11)
> +#define RING_FAULT_SRCID(x) ((x >> 3) & 0xff)
> +#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
> +#define RING_FAULT_VALID (1<<0)
> #define DONE_REG 0x40b0
> #define BSD_HWS_PGA_GEN7 (0x04180)
> #define BLT_HWS_PGA_GEN7 (0x04280)
More information about the Intel-gfx
mailing list