[Intel-gfx] [PATCH 5/5] drm/i915: Prioritise non-busywait semaphore workloads

Fri Mar 1 11:48:25 UTC 2019

On 01/03/2019 11:05, Chris Wilson wrote:
> We don't want to busywait on the GPU if we have other work to do. If we
> give non-busywaiting workloads higher (initial) priority than workloads
> that require a busywait, we will prioritise work that is ready to run
> immediately. We then also have to be careful that we don't give earlier
> semaphores an accidental boost because later work doesn't wait on other
> rings, hence we keep a history of semaphore usage of the dependency chain.
> 
> Testcase: igt/gem_exec_schedule/semaphore
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++++
>   drivers/gpu/drm/i915/i915_scheduler.c |  5 +++++
>   drivers/gpu/drm/i915/i915_scheduler.h |  9 ++++++---
>   drivers/gpu/drm/i915/intel_lrc.c      |  2 +-
>   4 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 59e30b8c4ee9..1524de65b37f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -813,6 +813,7 @@ emit_semaphore_wait(struct i915_request *to,
>   	*cs++ = 0;
>   
>   	intel_ring_advance(to, cs);
> +	to->sched.semaphore |= I915_SCHED_HAS_SEMAPHORE;
>   	return 0;
>   }
>   
> @@ -1083,6 +1084,21 @@ void i915_request_add(struct i915_request *request)
>   	if (engine->schedule) {
>   		struct i915_sched_attr attr = request->gem_context->sched;
>   
> +		/*
> +		 * Boost actual workloads past semaphores!
> +		 *
> +		 * With semaphores we spin on one engine waiting for another,
> +		 * simply to reduce the latency of starting our work when
> +		 * the signaler completes. However, if there is any other
> +		 * work that we could be doing on this engine instead, that
> +		 * is better utilisation and will reduce the overall duration
> +		 * of the current work. To avoid PI boosting a semaphore
> +		 * far in the distance past over useful work, we keep a history
> +		 * of any semaphore use along our dependency chain.
> +		 */
> +		if (!request->sched.semaphore)
> +			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> +
>   		/*
>   		 * Boost priorities to new clients (new request flows).
>   		 *
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 50018ad30233..fd684b9ed108 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -39,6 +39,7 @@ void i915_sched_node_init(struct i915_sched_node *node)
>   	INIT_LIST_HEAD(&node->waiters_list);
>   	INIT_LIST_HEAD(&node->link);
>   	node->attr.priority = I915_PRIORITY_INVALID;
> +	node->semaphore = 0;
>   }
>   
>   static struct i915_dependency *
> @@ -69,6 +70,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   		dep->signaler = signal;
>   		dep->flags = flags;
>   
> +		/* Keep track of whether anyone on this chain has a semaphore */
> +		if (signal->semaphore && !node_started(signal))
> +			node->semaphore |= signal->semaphore << 1;

Maybe I am confused.. this moves the bit flag to the left. Can't it fall 
off the end and we end up with zero after a deep enough chain?

> +
>   		ret = true;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 7d4a49750d92..068a6750540f 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -24,14 +24,15 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 2
> +#define I915_USER_PRIORITY_SHIFT 3
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
>   
> -#define I915_PRIORITY_WAIT	((u8)BIT(0))
> -#define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
> +#define I915_PRIORITY_WAIT		((u8)BIT(0))
> +#define I915_PRIORITY_NEWCLIENT		((u8)BIT(1))
> +#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(2))
>   
>   #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
>   
> @@ -74,6 +75,8 @@ struct i915_sched_node {
>   	struct list_head waiters_list; /* those after us, they depend upon us */
>   	struct list_head link;
>   	struct i915_sched_attr attr;
> +	unsigned long semaphore;
> +#define I915_SCHED_HAS_SEMAPHORE	BIT(0)

unsigned int?

>   };
>   
>   struct i915_dependency {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 53d6f7fdb50e..2268860cca44 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -164,7 +164,7 @@
>   #define WA_TAIL_DWORDS 2
>   #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
>   
> -#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
> +#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
>   
>   static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   					    struct intel_engine_cs *engine,
> 

Regards,

Tvrtko