[Intel-gfx] [PATCH] drm/i915/display: Apply interactive priority to explicit flip fences

Tue Jan 19 16:42:36 UTC 2021

On Mon, Jan 18, 2021 at 11:59:29AM +0000, Chris Wilson wrote:
> Currently, if a modeset/pageflip needs to wait for render completion to
> an object, we boost the priority of that rendering above all other work.
> We can apply the same interactive priority boosting to explicit fences
> that we can unwrap into a native i915_request (i.e. sync_file).
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 16 +++----
>  drivers/gpu/drm/i915/gem/i915_gem_object.h   |  3 ++
>  drivers/gpu/drm/i915/gem/i915_gem_wait.c     | 49 ++++++++++++++------
>  3 files changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index b728792e0c27..3a6b2e79c68b 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -13514,15 +13514,6 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
>  		intel_unpin_fb_vma(vma, old_plane_state->flags);
>  }
>  
> -static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_sched_attr attr = {
> -		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
> -	};
> -
> -	i915_gem_object_wait_priority(obj, 0, &attr);
> -}
> -
>  /**
>   * intel_prepare_plane_fb - Prepare fb for usage on plane
>   * @_plane: drm plane to prepare for
> @@ -13539,6 +13530,9 @@ int
>  intel_prepare_plane_fb(struct drm_plane *_plane,
>  		       struct drm_plane_state *_new_plane_state)
>  {
> +	struct i915_sched_attr attr = {
> +		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
> +	};
>  	struct intel_plane *plane = to_intel_plane(_plane);
>  	struct intel_plane_state *new_plane_state =
>  		to_intel_plane_state(_new_plane_state);
> @@ -13578,6 +13572,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
>  	}
>  
>  	if (new_plane_state->uapi.fence) { /* explicit fencing */
> +		i915_gem_fence_wait_priority(new_plane_state->uapi.fence,
> +					     &attr);
>  		ret = i915_sw_fence_await_dma_fence(&state->commit_ready,
>  						    new_plane_state->uapi.fence,
>  						    i915_fence_timeout(dev_priv),
> @@ -13599,7 +13595,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
>  	if (ret)
>  		return ret;
>  
> -	fb_obj_bump_render_priority(obj);
> +	i915_gem_object_wait_priority(obj, 0, &attr);
>  	i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
>  
>  	if (!new_plane_state->uapi.fence) { /* implicit fencing */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index be14486f63a7..31d05bfa57ce 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -512,6 +512,9 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
>  		obj->cache_dirty = true;
>  }
>  
> +void i915_gem_fence_wait_priority(struct dma_fence *fence,
> +				  const struct i915_sched_attr *attr);
> +
>  int i915_gem_object_wait(struct drm_i915_gem_object *obj,
>  			 unsigned int flags,
>  			 long timeout);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> index c1b13ac50d0f..3078f3a2864b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> @@ -5,6 +5,7 @@
>   */
>  
>  #include <linux/dma-fence-array.h>
> +#include <linux/dma-fence-chain.h>
>  #include <linux/jiffies.h>
>  
>  #include "gt/intel_engine.h"
> @@ -44,8 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
>  		unsigned int count, i;
>  		int ret;
>  
> -		ret = dma_resv_get_fences_rcu(resv,
> -							&excl, &count, &shared);
> +		ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared);
>  		if (ret)
>  			return ret;
>  
> @@ -91,39 +91,60 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
>  	return timeout;
>  }
>  
> -static void __fence_set_priority(struct dma_fence *fence,
> -				 const struct i915_sched_attr *attr)
> +static bool fence_set_priority(struct dma_fence *fence,
> +			       const struct i915_sched_attr *attr)
>  {
>  	struct i915_request *rq;
>  	struct intel_engine_cs *engine;
>  
>  	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
> -		return;
> +		return false;
>  
>  	rq = to_request(fence);
>  	engine = rq->engine;
>  
> -	local_bh_disable();
>  	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>  	if (engine->schedule)
>  		engine->schedule(rq, attr);
>  	rcu_read_unlock();
> -	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
> +
> +	return true;
>  }
>  
> -static void fence_set_priority(struct dma_fence *fence,
> -			       const struct i915_sched_attr *attr)
> +static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
>  {
> +	return fence->ops != &dma_fence_chain_ops;
> +}
> +
> +void i915_gem_fence_wait_priority(struct dma_fence *fence,
> +				  const struct i915_sched_attr *attr)
> +{
> +	if (dma_fence_is_signaled(fence))
> +		return;
> +
> +	local_bh_disable();
> +
>  	/* Recurse once into a fence-array */
>  	if (dma_fence_is_array(fence)) {
>  		struct dma_fence_array *array = to_dma_fence_array(fence);
>  		int i;
>  
>  		for (i = 0; i < array->num_fences; i++)
> -			__fence_set_priority(array->fences[i], attr);
> +			fence_set_priority(array->fences[i], attr);
> +	} else if (__dma_fence_is_chain(fence)) {
> +		struct dma_fence *iter;
> +
> +		dma_fence_chain_for_each(iter, fence) {
> +			if (!fence_set_priority(to_dma_fence_chain(iter)->fence,
> +						attr))
> +				break;

Does this mean the fence chain is ordered in some way, ie. the
rest of the fences in the chain will have been signalled already?
I couldn't find any description of what a fence chain really is
anywhere.

Otherwise looks sensible to me.
Reviewed-by: Ville Syrjälä <ville.syrjala at linux.intel.com>

> +		}
> +		dma_fence_put(iter);
>  	} else {
> -		__fence_set_priority(fence, attr);
> +		fence_set_priority(fence, attr);
>  	}
> +
> +	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
>  }
>  
>  int
> @@ -139,12 +160,12 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>  		int ret;
>  
>  		ret = dma_resv_get_fences_rcu(obj->base.resv,
> -							&excl, &count, &shared);
> +					      &excl, &count, &shared);
>  		if (ret)
>  			return ret;
>  
>  		for (i = 0; i < count; i++) {
> -			fence_set_priority(shared[i], attr);
> +			i915_gem_fence_wait_priority(shared[i], attr);
>  			dma_fence_put(shared[i]);
>  		}
>  
> @@ -154,7 +175,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>  	}
>  
>  	if (excl) {
> -		fence_set_priority(excl, attr);
> +		i915_gem_fence_wait_priority(excl, attr);
>  		dma_fence_put(excl);
>  	}
>  	return 0;
> -- 
> 2.20.1

-- 
Ville Syrjälä
Intel