[Intel-gfx] [PATCH] drm/i915: GFDT support for SNB/IVB
Daniel Vetter
daniel at ffwll.ch
Sun Mar 3 17:28:52 CET 2013
On Fri, Mar 01, 2013 at 08:32:57PM +0200, ville.syrjala at linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> Currently all scanout buffers must be uncached because the
> display controller doesn't snoop the LLC. SNB introduced another
> method to guarantee coherency for the display controller. It's
> called the GFDT or graphics data type.
>
> Pages that have the GFDT bit enabled in their PTEs get flushed
> all the way to memory when a MI_FLUSH_DW or PIPE_CONTROL is
> issued with the "synchronize GFDT" bit set.
>
> So rather than making all scanout buffers uncached, set the GFDT
> bit in their PTEs, and modify the ring flush functions to enable
> the "synchronize GFDT" bit.
>
> On HSW the GFDT bit was removed from the PTE, and it's only present in
> surface state, so we can't really set it from the kernel. Also the docs
> state that the hardware isn't actually guaranteed to respect the GFDT
> bit. So it looks like GFDT isn't all that useful on HSW.
>
> So far I've tried this very quickly on an IVB machine, and
> it seems to be working as advertised. No idea if it does any
> good though.
>
> TODO:
> - make sure there are no missing flushes (CPU access doesn't
> respect GFDT after all).
> - measure it and see if there's some real benefit
> - maybe we can track whether "synchronize GFDT" needs to be
> issued, and skip it when possible. needs some numbers to
> determine if it's worthwile.
>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
Iirc when I've tried this out a while back it regressed a few benchmarks.
Chris&me suspected cache trahsing, but hard to tell without proper
instrumentation support. Chris played around with a few tricks to mark
other giant bos as uncacheable, but he couldn't find any improved
workloads.
In short I think this needs to come with decent amounts of numbers
attached, like the TODO says ;-)
The other thing was that I didn't manage to get things to work properly,
leaving some random cachline dirt on the screen. Looking at your code, you
add the gfdt flush to every ring_flush, whereas I've tried to be clever
and only flushed after batches rendering to the frontbuffer. So probably a
bug in my code, or a flush on a given ring doesn't flush out caches for
one of the other engines.
And finally a bikeshed: I'd vote for a slightly more generice unsigned
long cache_flags instead of bool gfdt.
Cheers, Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 18 +++--
> drivers/gpu/drm/i915/i915_gem.c | 109 ++++++++++++++++++++++++-----
> drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +--
> drivers/gpu/drm/i915/i915_gem_gtt.c | 38 ++++++----
> drivers/gpu/drm/i915/i915_reg.h | 2 +
> drivers/gpu/drm/i915/intel_display.c | 4 +-
> drivers/gpu/drm/i915/intel_overlay.c | 2 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 25 +++++++
> 9 files changed, 167 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e95337c..537b344 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -408,7 +408,8 @@ struct i915_gtt {
> void (*gtt_insert_entries)(struct drm_device *dev,
> struct sg_table *st,
> unsigned int pg_start,
> - enum i915_cache_level cache_level);
> + enum i915_cache_level cache_level,
> + bool gfdt);
> };
> #define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
>
> @@ -429,7 +430,8 @@ struct i915_hw_ppgtt {
> void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
> struct sg_table *st,
> unsigned int pg_start,
> - enum i915_cache_level cache_level);
> + enum i915_cache_level cache_level,
> + bool gfdt);
> void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
> };
>
> @@ -1169,6 +1171,7 @@ struct drm_i915_gem_object {
> unsigned int fenced_gpu_access:1;
>
> unsigned int cache_level:2;
> + unsigned int gfdt:1;
>
> unsigned int has_aliasing_ppgtt_mapping:1;
> unsigned int has_global_gtt_mapping:1;
> @@ -1310,6 +1313,10 @@ struct drm_i915_file_private {
> #define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc)
> #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
>
> +/* Only SNB and IVB have GFDT in PTEs */
> +#define HAS_GFDT(dev) (HAS_LLC(dev) && !IS_HASWELL(dev) && \
> + (IS_GEN6(dev) || IS_GEN7(dev)))
> +
> #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6)
> #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >=6 && !IS_VALLEYVIEW(dev))
>
> @@ -1639,7 +1646,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> int __must_check
> i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
> int __must_check
> -i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> +i915_gem_object_pin_to_display_plane(struct drm_device *dev,
> + struct drm_i915_gem_object *obj,
> u32 alignment,
> struct intel_ring_buffer *pipelined);
> int i915_gem_attach_phys_object(struct drm_device *dev,
> @@ -1681,14 +1689,14 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
> void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_object *obj,
> - enum i915_cache_level cache_level);
> + enum i915_cache_level cache_level, bool gfdt);
> void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_object *obj);
>
> void i915_gem_restore_gtt_mappings(struct drm_device *dev);
> int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
> void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> - enum i915_cache_level cache_level);
> + enum i915_cache_level cache_level, bool gfdt);
> void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
> void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
> void i915_gem_init_global_gtt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8413ffc..c635430 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3035,8 +3035,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
> * and flushes when moving into and out of the RENDER domain, correct
> * snooping behaviour occurs naturally as the result of our domain
> * tracking.
> + *
> + * Also flush all pinned objects with GFDT enabled. Such objects
> + * may be used for scanout, and the CPU doesn't know anything
> + * about GFDT.
> */
> - if (obj->cache_level != I915_CACHE_NONE)
> + if (obj->cache_level != I915_CACHE_NONE &&
> + !(obj->gfdt && obj->pin_count))
> return;
>
> trace_i915_gem_object_clflush(obj);
> @@ -3154,6 +3159,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> struct drm_device *dev = obj->base.dev;
> drm_i915_private_t *dev_priv = dev->dev_private;
> int ret;
> + bool gfdt = obj->gfdt;
> +
> + /* Clear GFDT when moving to uncached */
> + if (cache_level == I915_CACHE_NONE)
> + gfdt = false;
>
> if (obj->cache_level == cache_level)
> return 0;
> @@ -3187,10 +3197,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> }
>
> if (obj->has_global_gtt_mapping)
> - i915_gem_gtt_bind_object(obj, cache_level);
> + i915_gem_gtt_bind_object(obj, cache_level, gfdt);
> if (obj->has_aliasing_ppgtt_mapping)
> i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> - obj, cache_level);
> + obj, cache_level, gfdt);
>
> obj->gtt_space->color = cache_level;
> }
> @@ -3219,6 +3229,65 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> }
>
> obj->cache_level = cache_level;
> + obj->gfdt = gfdt;
> + i915_gem_verify_gtt(dev);
> + return 0;
> +}
> +
> +static int i915_gem_object_set_gfdt(struct drm_i915_gem_object *obj,
> + bool gfdt)
> +{
> + struct drm_device *dev = obj->base.dev;
> + drm_i915_private_t *dev_priv = dev->dev_private;
> + int ret;
> +
> + if (!HAS_GFDT(dev))
> + return -ENODEV;
> +
> + if (obj->gfdt == gfdt)
> + return 0;
> +
> + /* no point in setting GFDT on uncached object */
> + if (obj->cache_level == I915_CACHE_NONE)
> + return -EINVAL;
> +
> + if (obj->gtt_space) {
> + ret = i915_gem_object_finish_gpu(obj);
> + if (ret)
> + return ret;
> +
> + i915_gem_object_finish_gtt(obj);
> +
> + if (obj->has_global_gtt_mapping)
> + i915_gem_gtt_bind_object(obj, obj->cache_level, gfdt);
> + if (obj->has_aliasing_ppgtt_mapping)
> + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> + obj, obj->cache_level, gfdt);
> + }
> +
> + if (gfdt) {
> + u32 old_read_domains, old_write_domain;
> +
> + /*
> + * Since we're just now enabling GFDT there's most
> + * likely dirty data in the LLC. It must be flushed
> + * to memory the old fashined way.
> + */
> + WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
> + WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
> +
> + old_read_domains = obj->base.read_domains;
> + old_write_domain = obj->base.write_domain;
> +
> + obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> + obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +
> + trace_i915_gem_object_change_domain(obj,
> + old_read_domains,
> + old_write_domain);
> + }
> +
> + obj->gfdt = gfdt;
> i915_gem_verify_gtt(dev);
> return 0;
> }
> @@ -3291,7 +3360,8 @@ unlock:
> * any flushes to be pipelined (for pageflips).
> */
> int
> -i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> +i915_gem_object_pin_to_display_plane(struct drm_device *dev,
> + struct drm_i915_gem_object *obj,
> u32 alignment,
> struct intel_ring_buffer *pipelined)
> {
> @@ -3304,18 +3374,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> return ret;
> }
>
> - /* The display engine is not coherent with the LLC cache on gen6. As
> - * a result, we make sure that the pinning that is about to occur is
> - * done with uncached PTEs. This is lowest common denominator for all
> - * chipsets.
> - *
> - * However for gen6+, we could do better by using the GFDT bit instead
> - * of uncaching, which would allow us to flush all the LLC-cached data
> - * with that bit in the PTE to main memory with just one PIPE_CONTROL.
> + /*
> + * Try to set the GFDT bit instead of uncaching. This allow us to flush
> + * all the LLC-cached data with that bit in the PTE to main memory with
> + * just one PIPE_CONTROL.
> */
> - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> - if (ret)
> - return ret;
> + ret = i915_gem_object_set_gfdt(obj, true);
> + if (ret) {
> + /*
> + * The display engine is not coherent with the LLC cache on gen6. As
> + * a result, we make sure that the pinning that is about to occur is
> + * done with uncached PTEs. This is lowest common denominator for all
> + * chipsets.
> + */
> + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> + if (ret)
> + return ret;
> + }
>
> /* As the user may map the buffer once pinned in the display plane
> * (e.g. libkms for the bootup splash), we have to ensure that we
> @@ -3499,11 +3574,11 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
> return ret;
>
> if (!dev_priv->mm.aliasing_ppgtt)
> - i915_gem_gtt_bind_object(obj, obj->cache_level);
> + i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
> }
>
> if (!obj->has_global_gtt_mapping && map_and_fenceable)
> - i915_gem_gtt_bind_object(obj, obj->cache_level);
> + i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>
> obj->pin_count++;
> obj->pin_mappable |= map_and_fenceable;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 21177d9..7244c0c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -382,7 +382,8 @@ static int do_switch(struct i915_hw_context *to)
> }
>
> if (!to->obj->has_global_gtt_mapping)
> - i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
> + i915_gem_gtt_bind_object(to->obj, to->obj->cache_level,
> + to->obj->gfdt);
>
> if (!to->is_initialized || is_default_context(to))
> hw_flags |= MI_RESTORE_INHIBIT;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2f2daeb..28a3c42 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -197,7 +197,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
> !target_i915_obj->has_global_gtt_mapping)) {
> i915_gem_gtt_bind_object(target_i915_obj,
> - target_i915_obj->cache_level);
> + target_i915_obj->cache_level,
> + target_i915_obj->gfdt);
> }
>
> /* Validate that the target is in a valid r/w GPU domain */
> @@ -432,7 +433,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> /* Ensure ppgtt mapping exists if needed */
> if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> - obj, obj->cache_level);
> + obj, obj->cache_level, obj->gfdt);
>
> obj->has_aliasing_ppgtt_mapping = 1;
> }
> @@ -449,7 +450,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
>
> if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
> !obj->has_global_gtt_mapping)
> - i915_gem_gtt_bind_object(obj, obj->cache_level);
> + i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>
> return 0;
> }
> @@ -1011,7 +1012,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> * hsw should have this fixed, but let's be paranoid and do it
> * unconditionally for now. */
> if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
> - i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
> + i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level,
> + batch_obj->gfdt);
>
> ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
> if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 926a1e2..3c7e48b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -42,15 +42,20 @@ typedef uint32_t gtt_pte_t;
> #define HSW_PTE_UNCACHED (0)
> #define GEN6_PTE_CACHE_LLC (2 << 1)
> #define GEN6_PTE_CACHE_LLC_MLC (3 << 1)
> +#define GEN6_PTE_GFDT (1 << 3)
> #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
>
> static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
> dma_addr_t addr,
> - enum i915_cache_level level)
> + enum i915_cache_level level,
> + bool gfdt)
> {
> gtt_pte_t pte = GEN6_PTE_VALID;
> pte |= GEN6_PTE_ADDR_ENCODE(addr);
>
> + if (gfdt && HAS_GFDT(dev))
> + pte |= GEN6_PTE_GFDT;
> +
> switch (level) {
> case I915_CACHE_LLC_MLC:
> /* Haswell doesn't set L3 this way */
> @@ -89,7 +94,7 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
>
> scratch_pte = gen6_pte_encode(ppgtt->dev,
> ppgtt->scratch_page_dma_addr,
> - I915_CACHE_LLC);
> + I915_CACHE_LLC, false);
>
> while (num_entries) {
> last_pte = first_pte + num_entries;
> @@ -112,7 +117,8 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
> static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
> struct sg_table *pages,
> unsigned first_entry,
> - enum i915_cache_level cache_level)
> + enum i915_cache_level cache_level,
> + bool gfdt)
> {
> gtt_pte_t *pt_vaddr;
> unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
> @@ -133,7 +139,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
> for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
> page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
> - cache_level);
> + cache_level, gfdt);
>
> /* grab the next page */
> if (++m == segment_len) {
> @@ -279,11 +285,12 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
>
> void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_object *obj,
> - enum i915_cache_level cache_level)
> + enum i915_cache_level cache_level,
> + bool gfdt)
> {
> ppgtt->insert_entries(ppgtt, obj->pages,
> obj->gtt_space->start >> PAGE_SHIFT,
> - cache_level);
> + cache_level, gfdt);
> }
>
> void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> @@ -401,7 +408,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>
> list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
> i915_gem_clflush_object(obj);
> - i915_gem_gtt_bind_object(obj, obj->cache_level);
> + i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
> }
>
> i915_gem_chipset_flush(dev);
> @@ -429,7 +436,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> static void gen6_ggtt_insert_entries(struct drm_device *dev,
> struct sg_table *st,
> unsigned int first_entry,
> - enum i915_cache_level level)
> + enum i915_cache_level level,
> + bool gfdt)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct scatterlist *sg = st->sgl;
> @@ -443,7 +451,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
> len = sg_dma_len(sg) >> PAGE_SHIFT;
> for (m = 0; m < len; m++) {
> addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> - iowrite32(gen6_pte_encode(dev, addr, level),
> + iowrite32(gen6_pte_encode(dev, addr, level, gfdt),
> >t_entries[i]);
> i++;
> }
> @@ -457,7 +465,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
> */
> if (i != 0)
> WARN_ON(readl(>t_entries[i-1])
> - != gen6_pte_encode(dev, addr, level));
> + != gen6_pte_encode(dev, addr, level, gfdt));
>
> /* This next bit makes the above posting read even more important. We
> * want to flush the TLBs only after we're certain all the PTE updates
> @@ -483,7 +491,7 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
> num_entries = max_entries;
>
> scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
> - I915_CACHE_LLC);
> + I915_CACHE_LLC, false);
> for (i = 0; i < num_entries; i++)
> iowrite32(scratch_pte, >t_base[i]);
> readl(gtt_base);
> @@ -493,7 +501,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
> static void i915_ggtt_insert_entries(struct drm_device *dev,
> struct sg_table *st,
> unsigned int pg_start,
> - enum i915_cache_level cache_level)
> + enum i915_cache_level cache_level,
> + bool gfdt)
> {
> unsigned int flags = (cache_level == I915_CACHE_NONE) ?
> AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
> @@ -511,14 +520,15 @@ static void i915_ggtt_clear_range(struct drm_device *dev,
>
>
> void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> - enum i915_cache_level cache_level)
> + enum i915_cache_level cache_level,
> + bool gfdt)
> {
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
> obj->gtt_space->start >> PAGE_SHIFT,
> - cache_level);
> + cache_level, gfdt);
>
> obj->has_global_gtt_mapping = 1;
> }
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index cd226c2..6aa29d5 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -242,6 +242,7 @@
> #define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
> #define MI_FLUSH_DW_STORE_INDEX (1<<21)
> #define MI_INVALIDATE_TLB (1<<18)
> +#define MI_SYNCHRONIZE_GFDT (1<<17)
> #define MI_FLUSH_DW_OP_STOREDW (1<<14)
> #define MI_INVALIDATE_BSD (1<<7)
> #define MI_FLUSH_DW_USE_GTT (1<<2)
> @@ -311,6 +312,7 @@
> #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
> #define PIPE_CONTROL_CS_STALL (1<<20)
> #define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
> +#define PIPE_CONTROL_SYNCHRONIZE_GFDT (1<<17)
> #define PIPE_CONTROL_QW_WRITE (1<<14)
> #define PIPE_CONTROL_DEPTH_STALL (1<<13)
> #define PIPE_CONTROL_WRITE_FLUSH (1<<12)
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d912306..fc99d55 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -1968,7 +1968,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
> }
>
> dev_priv->mm.interruptible = false;
> - ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
> + ret = i915_gem_object_pin_to_display_plane(dev, obj, alignment, pipelined);
> if (ret)
> goto err_interruptible;
>
> @@ -6340,7 +6340,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
> goto fail_locked;
> }
>
> - ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL);
> + ret = i915_gem_object_pin_to_display_plane(dev, obj, 0, NULL);
> if (ret) {
> DRM_ERROR("failed to move cursor bo into the GTT\n");
> goto fail_locked;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 67a2501..b4110a2 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -695,7 +695,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
> if (ret != 0)
> return ret;
>
> - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
> + ret = i915_gem_object_pin_to_display_plane(dev, new_bo, 0, NULL);
> if (ret != 0)
> return ret;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 1d5d613..8406247 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -234,6 +234,12 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
> * when the render cache is indeed flushed.
> */
> flags |= PIPE_CONTROL_CS_STALL;
> +
> + /* Flush GFDT out to memory */
> + if (HAS_GFDT(ring->dev)) {
> + flags |= PIPE_CONTROL_QW_WRITE;
> + flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
> + }
> }
> if (invalidate_domains) {
> flags |= PIPE_CONTROL_TLB_INVALIDATE;
> @@ -306,6 +312,13 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
> if (flush_domains) {
> flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +
> + /* Flush GFDT out to memory */
> + if (HAS_GFDT(ring->dev)) {
> + flags |= PIPE_CONTROL_QW_WRITE;
> + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> + flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
> + }
> }
> if (invalidate_domains) {
> flags |= PIPE_CONTROL_TLB_INVALIDATE;
> @@ -1557,6 +1570,12 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
> return ret;
>
> cmd = MI_FLUSH_DW;
> +
> + /* Flush GFDT out to memory */
> + if (flush & I915_GEM_GPU_DOMAINS && HAS_GFDT(ring->dev))
> + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
> + MI_SYNCHRONIZE_GFDT;
> +
> /*
> * Bspec vol 1c.5 - video engine command streamer:
> * "If ENABLED, all TLBs will be invalidated once the flush
> @@ -1629,6 +1648,12 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
> return ret;
>
> cmd = MI_FLUSH_DW;
> +
> + /* Flush GFDT out to memory */
> + if (flush & I915_GEM_DOMAIN_RENDER && HAS_GFDT(ring->dev))
> + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
> + MI_SYNCHRONIZE_GFDT;
> +
> /*
> * Bspec vol 1c.3 - blitter engine command streamer:
> * "If ENABLED, all TLBs will be invalidated once the flush
> --
> 1.7.12.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list