[Intel-gfx] [PATCH] drm/i915: GFDT support for SNB/IVB

Daniel Vetter daniel at ffwll.ch
Sun Mar 3 17:28:52 CET 2013


On Fri, Mar 01, 2013 at 08:32:57PM +0200, ville.syrjala at linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> Currently all scanout buffers must be uncached because the
> display controller doesn't snoop the LLC. SNB introduced another
> method to guarantee coherency for the display controller. It's
> called the GFDT or graphics data type.
> 
> Pages that have the GFDT bit enabled in their PTEs get flushed
> all the way to memory when a MI_FLUSH_DW or PIPE_CONTROL is
> issued with the "synchronize GFDT" bit set.
> 
> So rather than making all scanout buffers uncached, set the GFDT
> bit in their PTEs, and modify the ring flush functions to enable
> the "synchronize GFDT" bit.
> 
> On HSW the GFDT bit was removed from the PTE, and it's only present in
> surface state, so we can't really set it from the kernel. Also the docs
> state that the hardware isn't actually guaranteed to respect the GFDT
> bit. So it looks like GFDT isn't all that useful on HSW.
> 
> So far I've tried this very quickly on an IVB machine, and
> it seems to be working as advertised. No idea if it does any
> good though.
> 
> TODO:
> - make sure there are no missing flushes (CPU access doesn't
>   respect GFDT after all).
> - measure it and see if there's some real benefit
> - maybe we can track whether "synchronize GFDT" needs to be
>   issued, and skip it when possible. needs some numbers to
>   determine if it's worthwile.
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>

Iirc when I've tried this out a while back it regressed a few benchmarks.
Chris&me suspected cache trahsing, but hard to tell without proper
instrumentation support. Chris played around with a few tricks to mark
other giant bos as uncacheable, but he couldn't find any improved
workloads.

In short I think this needs to come with decent amounts of numbers
attached, like the TODO says ;-)

The other thing was that I didn't manage to get things to work properly,
leaving some random cachline dirt on the screen. Looking at your code, you
add the gfdt flush to every ring_flush, whereas I've tried to be clever
and only flushed after batches rendering to the frontbuffer. So probably a
bug in my code, or a flush on a given ring doesn't flush out caches for
one of the other engines.

And finally a bikeshed: I'd vote for a slightly more generice unsigned
long cache_flags instead of bool gfdt.

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  18 +++--
>  drivers/gpu/drm/i915/i915_gem.c            | 109 ++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_context.c    |   3 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  10 +--
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |  38 ++++++----
>  drivers/gpu/drm/i915/i915_reg.h            |   2 +
>  drivers/gpu/drm/i915/intel_display.c       |   4 +-
>  drivers/gpu/drm/i915/intel_overlay.c       |   2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  25 +++++++
>  9 files changed, 167 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e95337c..537b344 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -408,7 +408,8 @@ struct i915_gtt {
>  	void (*gtt_insert_entries)(struct drm_device *dev,
>  				   struct sg_table *st,
>  				   unsigned int pg_start,
> -				   enum i915_cache_level cache_level);
> +				   enum i915_cache_level cache_level,
> +				   bool gfdt);
>  };
>  #define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
>  
> @@ -429,7 +430,8 @@ struct i915_hw_ppgtt {
>  	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
>  			       struct sg_table *st,
>  			       unsigned int pg_start,
> -			       enum i915_cache_level cache_level);
> +			       enum i915_cache_level cache_level,
> +			       bool gfdt);
>  	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
>  };
>  
> @@ -1169,6 +1171,7 @@ struct drm_i915_gem_object {
>  	unsigned int fenced_gpu_access:1;
>  
>  	unsigned int cache_level:2;
> +	unsigned int gfdt:1;
>  
>  	unsigned int has_aliasing_ppgtt_mapping:1;
>  	unsigned int has_global_gtt_mapping:1;
> @@ -1310,6 +1313,10 @@ struct drm_i915_file_private {
>  #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
>  #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
>  
> +/* Only SNB and IVB have GFDT in PTEs */
> +#define HAS_GFDT(dev)		(HAS_LLC(dev) && !IS_HASWELL(dev) && \
> +				 (IS_GEN6(dev) || IS_GEN7(dev)))
> +
>  #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
>  #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >=6 && !IS_VALLEYVIEW(dev))
>  
> @@ -1639,7 +1646,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
>  int __must_check
>  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
>  int __must_check
> -i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> +i915_gem_object_pin_to_display_plane(struct drm_device *dev,
> +				     struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     struct intel_ring_buffer *pipelined);
>  int i915_gem_attach_phys_object(struct drm_device *dev,
> @@ -1681,14 +1689,14 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
>  void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
>  			    struct drm_i915_gem_object *obj,
> -			    enum i915_cache_level cache_level);
> +			    enum i915_cache_level cache_level, bool gfdt);
>  void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
>  			      struct drm_i915_gem_object *obj);
>  
>  void i915_gem_restore_gtt_mappings(struct drm_device *dev);
>  int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
>  void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> -				enum i915_cache_level cache_level);
> +			      enum i915_cache_level cache_level, bool gfdt);
>  void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
>  void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
>  void i915_gem_init_global_gtt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8413ffc..c635430 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3035,8 +3035,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
>  	 * and flushes when moving into and out of the RENDER domain, correct
>  	 * snooping behaviour occurs naturally as the result of our domain
>  	 * tracking.
> +	 *
> +	 * Also flush all pinned objects with GFDT enabled. Such objects
> +	 * may be used for scanout, and the CPU doesn't know anything
> +	 * about GFDT.
>  	 */
> -	if (obj->cache_level != I915_CACHE_NONE)
> +	if (obj->cache_level != I915_CACHE_NONE &&
> +	    !(obj->gfdt && obj->pin_count))
>  		return;
>  
>  	trace_i915_gem_object_clflush(obj);
> @@ -3154,6 +3159,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	int ret;
> +	bool gfdt = obj->gfdt;
> +
> +	/* Clear GFDT when moving to uncached */
> +	if (cache_level == I915_CACHE_NONE)
> +		gfdt = false;
>  
>  	if (obj->cache_level == cache_level)
>  		return 0;
> @@ -3187,10 +3197,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  		}
>  
>  		if (obj->has_global_gtt_mapping)
> -			i915_gem_gtt_bind_object(obj, cache_level);
> +			i915_gem_gtt_bind_object(obj, cache_level, gfdt);
>  		if (obj->has_aliasing_ppgtt_mapping)
>  			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -					       obj, cache_level);
> +					       obj, cache_level, gfdt);
>  
>  		obj->gtt_space->color = cache_level;
>  	}
> @@ -3219,6 +3229,65 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  	}
>  
>  	obj->cache_level = cache_level;
> +	obj->gfdt = gfdt;
> +	i915_gem_verify_gtt(dev);
> +	return 0;
> +}
> +
> +static int i915_gem_object_set_gfdt(struct drm_i915_gem_object *obj,
> +				    bool gfdt)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int ret;
> +
> +	if (!HAS_GFDT(dev))
> +		return -ENODEV;
> +
> +	if (obj->gfdt == gfdt)
> +		return 0;
> +
> +	/* no point in setting GFDT on uncached object */
> +	if (obj->cache_level == I915_CACHE_NONE)
> +		return -EINVAL;
> +
> +	if (obj->gtt_space) {
> +		ret = i915_gem_object_finish_gpu(obj);
> +		if (ret)
> +			return ret;
> +
> +		i915_gem_object_finish_gtt(obj);
> +
> +		if (obj->has_global_gtt_mapping)
> +			i915_gem_gtt_bind_object(obj, obj->cache_level, gfdt);
> +		if (obj->has_aliasing_ppgtt_mapping)
> +			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> +					       obj, obj->cache_level, gfdt);
> +	}
> +
> +	if (gfdt) {
> +		u32 old_read_domains, old_write_domain;
> +
> +		/*
> +		 * Since we're just now enabling GFDT there's most
> +		 * likely dirty data in the LLC. It must be flushed
> +		 * to memory the old fashined way.
> +		 */
> +		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
> +		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
> +
> +		old_read_domains = obj->base.read_domains;
> +		old_write_domain = obj->base.write_domain;
> +
> +		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +
> +		trace_i915_gem_object_change_domain(obj,
> +						    old_read_domains,
> +						    old_write_domain);
> +	}
> +
> +	obj->gfdt = gfdt;
>  	i915_gem_verify_gtt(dev);
>  	return 0;
>  }
> @@ -3291,7 +3360,8 @@ unlock:
>   * any flushes to be pipelined (for pageflips).
>   */
>  int
> -i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> +i915_gem_object_pin_to_display_plane(struct drm_device *dev,
> +				     struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     struct intel_ring_buffer *pipelined)
>  {
> @@ -3304,18 +3374,23 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  			return ret;
>  	}
>  
> -	/* The display engine is not coherent with the LLC cache on gen6.  As
> -	 * a result, we make sure that the pinning that is about to occur is
> -	 * done with uncached PTEs. This is lowest common denominator for all
> -	 * chipsets.
> -	 *
> -	 * However for gen6+, we could do better by using the GFDT bit instead
> -	 * of uncaching, which would allow us to flush all the LLC-cached data
> -	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
> +	/*
> +	 * Try to set the GFDT bit instead of uncaching. This allow us to flush
> +	 * all the LLC-cached data with that bit in the PTE to main memory with
> +	 * just one PIPE_CONTROL.
>  	 */
> -	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> -	if (ret)
> -		return ret;
> +	ret = i915_gem_object_set_gfdt(obj, true);
> +	if (ret) {
> +		/*
> +		 * The display engine is not coherent with the LLC cache on gen6.  As
> +		 * a result, we make sure that the pinning that is about to occur is
> +		 * done with uncached PTEs. This is lowest common denominator for all
> +		 * chipsets.
> +		 */
> +		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	/* As the user may map the buffer once pinned in the display plane
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
> @@ -3499,11 +3574,11 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  			return ret;
>  
>  		if (!dev_priv->mm.aliasing_ppgtt)
> -			i915_gem_gtt_bind_object(obj, obj->cache_level);
> +			i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>  	}
>  
>  	if (!obj->has_global_gtt_mapping && map_and_fenceable)
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>  
>  	obj->pin_count++;
>  	obj->pin_mappable |= map_and_fenceable;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 21177d9..7244c0c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -382,7 +382,8 @@ static int do_switch(struct i915_hw_context *to)
>  	}
>  
>  	if (!to->obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
> +		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level,
> +					 to->obj->gfdt);
>  
>  	if (!to->is_initialized || is_default_context(to))
>  		hw_flags |= MI_RESTORE_INHIBIT;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2f2daeb..28a3c42 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -197,7 +197,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
>  	    !target_i915_obj->has_global_gtt_mapping)) {
>  		i915_gem_gtt_bind_object(target_i915_obj,
> -					 target_i915_obj->cache_level);
> +					 target_i915_obj->cache_level,
> +					 target_i915_obj->gfdt);
>  	}
>  
>  	/* Validate that the target is in a valid r/w GPU domain */
> @@ -432,7 +433,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
>  	/* Ensure ppgtt mapping exists if needed */
>  	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
>  		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
> -				       obj, obj->cache_level);
> +				       obj, obj->cache_level, obj->gfdt);
>  
>  		obj->has_aliasing_ppgtt_mapping = 1;
>  	}
> @@ -449,7 +450,7 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
>  
>  	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
>  	    !obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>  
>  	return 0;
>  }
> @@ -1011,7 +1012,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	 * hsw should have this fixed, but let's be paranoid and do it
>  	 * unconditionally for now. */
>  	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
> -		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
> +		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level,
> +					 batch_obj->gfdt);
>  
>  	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
>  	if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 926a1e2..3c7e48b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -42,15 +42,20 @@ typedef uint32_t gtt_pte_t;
>  #define HSW_PTE_UNCACHED		(0)
>  #define GEN6_PTE_CACHE_LLC		(2 << 1)
>  #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
> +#define GEN6_PTE_GFDT			(1 << 3)
>  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
>  
>  static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
>  					dma_addr_t addr,
> -					enum i915_cache_level level)
> +					enum i915_cache_level level,
> +					bool gfdt)
>  {
>  	gtt_pte_t pte = GEN6_PTE_VALID;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
>  
> +	if (gfdt && HAS_GFDT(dev))
> +		pte |= GEN6_PTE_GFDT;
> +
>  	switch (level) {
>  	case I915_CACHE_LLC_MLC:
>  		/* Haswell doesn't set L3 this way */
> @@ -89,7 +94,7 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
>  
>  	scratch_pte = gen6_pte_encode(ppgtt->dev,
>  				      ppgtt->scratch_page_dma_addr,
> -				      I915_CACHE_LLC);
> +				      I915_CACHE_LLC, false);
>  
>  	while (num_entries) {
>  		last_pte = first_pte + num_entries;
> @@ -112,7 +117,8 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
>  static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
>  				      struct sg_table *pages,
>  				      unsigned first_entry,
> -				      enum i915_cache_level cache_level)
> +				      enum i915_cache_level cache_level,
> +				      bool gfdt)
>  {
>  	gtt_pte_t *pt_vaddr;
>  	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
> @@ -133,7 +139,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
>  		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
>  			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
>  			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
> -						      cache_level);
> +						      cache_level, gfdt);
>  
>  			/* grab the next page */
>  			if (++m == segment_len) {
> @@ -279,11 +285,12 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
>  
>  void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
>  			    struct drm_i915_gem_object *obj,
> -			    enum i915_cache_level cache_level)
> +			    enum i915_cache_level cache_level,
> +			    bool gfdt)
>  {
>  	ppgtt->insert_entries(ppgtt, obj->pages,
>  			      obj->gtt_space->start >> PAGE_SHIFT,
> -			      cache_level);
> +			      cache_level, gfdt);
>  }
>  
>  void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> @@ -401,7 +408,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
>  		i915_gem_clflush_object(obj);
> -		i915_gem_gtt_bind_object(obj, obj->cache_level);
> +		i915_gem_gtt_bind_object(obj, obj->cache_level, obj->gfdt);
>  	}
>  
>  	i915_gem_chipset_flush(dev);
> @@ -429,7 +436,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>  static void gen6_ggtt_insert_entries(struct drm_device *dev,
>  				     struct sg_table *st,
>  				     unsigned int first_entry,
> -				     enum i915_cache_level level)
> +				     enum i915_cache_level level,
> +				     bool gfdt)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct scatterlist *sg = st->sgl;
> @@ -443,7 +451,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
>  		len = sg_dma_len(sg) >> PAGE_SHIFT;
>  		for (m = 0; m < len; m++) {
>  			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> -			iowrite32(gen6_pte_encode(dev, addr, level),
> +			iowrite32(gen6_pte_encode(dev, addr, level, gfdt),
>  				  &gtt_entries[i]);
>  			i++;
>  		}
> @@ -457,7 +465,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
>  	 */
>  	if (i != 0)
>  		WARN_ON(readl(&gtt_entries[i-1])
> -			!= gen6_pte_encode(dev, addr, level));
> +			!= gen6_pte_encode(dev, addr, level, gfdt));
>  
>  	/* This next bit makes the above posting read even more important. We
>  	 * want to flush the TLBs only after we're certain all the PTE updates
> @@ -483,7 +491,7 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
>  		num_entries = max_entries;
>  
>  	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
> -				      I915_CACHE_LLC);
> +				      I915_CACHE_LLC, false);
>  	for (i = 0; i < num_entries; i++)
>  		iowrite32(scratch_pte, &gtt_base[i]);
>  	readl(gtt_base);
> @@ -493,7 +501,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
>  static void i915_ggtt_insert_entries(struct drm_device *dev,
>  				     struct sg_table *st,
>  				     unsigned int pg_start,
> -				     enum i915_cache_level cache_level)
> +				     enum i915_cache_level cache_level,
> +				     bool gfdt)
>  {
>  	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
>  		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
> @@ -511,14 +520,15 @@ static void i915_ggtt_clear_range(struct drm_device *dev,
>  
>  
>  void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> -			      enum i915_cache_level cache_level)
> +			      enum i915_cache_level cache_level,
> +			      bool gfdt)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
>  					 obj->gtt_space->start >> PAGE_SHIFT,
> -					 cache_level);
> +					 cache_level, gfdt);
>  
>  	obj->has_global_gtt_mapping = 1;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index cd226c2..6aa29d5 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -242,6 +242,7 @@
>  #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
>  #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
>  #define   MI_INVALIDATE_TLB		(1<<18)
> +#define   MI_SYNCHRONIZE_GFDT		(1<<17)
>  #define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
>  #define   MI_INVALIDATE_BSD		(1<<7)
>  #define   MI_FLUSH_DW_USE_GTT		(1<<2)
> @@ -311,6 +312,7 @@
>  #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
>  #define   PIPE_CONTROL_CS_STALL				(1<<20)
>  #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
> +#define   PIPE_CONTROL_SYNCHRONIZE_GFDT			(1<<17)
>  #define   PIPE_CONTROL_QW_WRITE				(1<<14)
>  #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
>  #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d912306..fc99d55 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -1968,7 +1968,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
>  	}
>  
>  	dev_priv->mm.interruptible = false;
> -	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
> +	ret = i915_gem_object_pin_to_display_plane(dev, obj, alignment, pipelined);
>  	if (ret)
>  		goto err_interruptible;
>  
> @@ -6340,7 +6340,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
>  			goto fail_locked;
>  		}
>  
> -		ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL);
> +		ret = i915_gem_object_pin_to_display_plane(dev, obj, 0, NULL);
>  		if (ret) {
>  			DRM_ERROR("failed to move cursor bo into the GTT\n");
>  			goto fail_locked;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 67a2501..b4110a2 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -695,7 +695,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	if (ret != 0)
>  		return ret;
>  
> -	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
> +	ret = i915_gem_object_pin_to_display_plane(dev, new_bo, 0, NULL);
>  	if (ret != 0)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 1d5d613..8406247 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -234,6 +234,12 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
>  		 * when the render cache is indeed flushed.
>  		 */
>  		flags |= PIPE_CONTROL_CS_STALL;
> +
> +		/* Flush GFDT out to memory */
> +		if (HAS_GFDT(ring->dev)) {
> +			flags |= PIPE_CONTROL_QW_WRITE;
> +			flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
> +		}
>  	}
>  	if (invalidate_domains) {
>  		flags |= PIPE_CONTROL_TLB_INVALIDATE;
> @@ -306,6 +312,13 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
>  	if (flush_domains) {
>  		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
>  		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +
> +		/* Flush GFDT out to memory */
> +		if (HAS_GFDT(ring->dev)) {
> +			flags |= PIPE_CONTROL_QW_WRITE;
> +			flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +			flags |= PIPE_CONTROL_SYNCHRONIZE_GFDT;
> +		}
>  	}
>  	if (invalidate_domains) {
>  		flags |= PIPE_CONTROL_TLB_INVALIDATE;
> @@ -1557,6 +1570,12 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
>  		return ret;
>  
>  	cmd = MI_FLUSH_DW;
> +
> +	/* Flush GFDT out to memory */
> +	if (flush & I915_GEM_GPU_DOMAINS && HAS_GFDT(ring->dev))
> +		cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
> +			MI_SYNCHRONIZE_GFDT;
> +
>  	/*
>  	 * Bspec vol 1c.5 - video engine command streamer:
>  	 * "If ENABLED, all TLBs will be invalidated once the flush
> @@ -1629,6 +1648,12 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
>  		return ret;
>  
>  	cmd = MI_FLUSH_DW;
> +
> +	/* Flush GFDT out to memory */
> +	if (flush & I915_GEM_DOMAIN_RENDER && HAS_GFDT(ring->dev))
> +		cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW |
> +			MI_SYNCHRONIZE_GFDT;
> +
>  	/*
>  	 * Bspec vol 1c.3 - blitter engine command streamer:
>  	 * "If ENABLED, all TLBs will be invalidated once the flush
> -- 
> 1.7.12.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list