[Intel-gfx] [PATCH 4/4] drm/i915: Opportunistically reduce flushing at execbuf

Ville Syrjälä ville.syrjala at linux.intel.com
Sun Dec 14 05:12:21 PST 2014


On Sat, Dec 13, 2014 at 07:08:24PM -0800, Ben Widawsky wrote:
> If we're moving a bunch of buffers from the CPU domain to the GPU domain, and
> we've already blown out the entire cache via a wbinvd, there is nothing more to
> do.
> 
> With this and the previous patches, I am seeing a 3x FPS increase on a certain
> benchmark which uses a giant 2d array texture. Unless I missed something in the
> code, it should only effect non-LLC i915 platforms.
> 
> I haven't yet run any numbers for other benchmarks, nor have I attempted to
> check if various conformance tests still pass.
> 
> NOTE: As mentioned in the previous patch, if one can easily obtain the largest
> buffer and attempt to flush it first, the results would be even more desirable.

So even with that optimization if you only have tons of small buffers
that need to be flushed you'd still take the clflush path for every
single one.

How difficult would it to calculate the total size to be flushed first,
and then make the clflush vs. wbinvd decision base on that?

> 
> Cc: DRI Development <dri-devel at lists.freedesktop.org>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  3 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 12 +++++-------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +++++---
>  drivers/gpu/drm/i915/intel_lrc.c           |  8 +++++---
>  4 files changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d68c75f..fdb92a3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2642,7 +2642,8 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
>  }
>  
>  void i915_gem_reset(struct drm_device *dev);
> -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
> +enum drm_cache_flush
> +i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int i915_gem_init_rings(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index de241eb..3746738 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3608,7 +3608,7 @@ err_unpin:
>  	return vma;
>  }
>  
> -bool
> +enum drm_cache_flush
>  i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  			bool force)
>  {
> @@ -3617,14 +3617,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * again at bind time.
>  	 */
>  	if (obj->pages == NULL)
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	/*
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
>  	if (obj->stolen || obj->phys_handle)
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
>  	 * we do not need to manually clear the CPU cache lines.  However,
> @@ -3635,12 +3635,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * tracking.
>  	 */
>  	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
> -		return false;
> +		return DRM_CACHE_FLUSH_NONE;
>  
>  	trace_i915_gem_object_clflush(obj);
> -	drm_clflush_sg(obj->pages);
> -
> -	return true;
> +	return drm_clflush_sg(obj->pages);
>  }
>  
>  /** Flushes the GTT write domain for the object if it's dirty. */
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0c25f62..e8eb9e9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -827,7 +827,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  {
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
> -	bool flush_chipset = false;
> +	enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> @@ -836,8 +836,10 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  		if (ret)
>  			return ret;
>  
> -		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
> -			flush_chipset |= i915_gem_clflush_object(obj, false);
> +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
> +		    flush_chipset != DRM_CACHE_FLUSH_WBINVD) {
> +			flush_chipset = i915_gem_clflush_object(obj, false);
> +		}
>  
>  		flush_domains |= obj->base.write_domain;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 89b5577..a6c6ebd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -611,7 +611,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  	struct intel_engine_cs *ring = ringbuf->ring;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
> -	bool flush_chipset = false;
> +	enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
> @@ -621,8 +621,10 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
>  		if (ret)
>  			return ret;
>  
> -		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
> -			flush_chipset |= i915_gem_clflush_object(obj, false);
> +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
> +		    flush_chipset != DRM_CACHE_FLUSH_WBINVD) {
> +			flush_chipset = i915_gem_clflush_object(obj, false);
> +		}
>  
>  		flush_domains |= obj->base.write_domain;
>  	}
> -- 
> 2.1.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC


More information about the Intel-gfx mailing list