[Intel-gfx] [PATCH v3] drm/i915: Limit C-states when waiting for the active request

Mon Jul 30 13:07:07 UTC 2018

Chris Wilson <chris at chris-wilson.co.uk> writes:

> If we are waiting for the currently executing request, we have a good
> idea that it will be completed in the very near future and so want to
> cap the CPU_DMA_LATENCY to ensure that we wake up the client quickly.
>
> v2: Not allowed to block in kmalloc after setting TASK_INTERRUPTIBLE.
> v3: Avoid the blocking notifier as well for TASK_INTERRUPTIBLE
>
> Testcase: igt/gem_sync/store-default
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Eero Tamminen <eero.t.tamminen at intel.com>
> Cc: Francisco Jerez <currojerez at riseup.net>
> ---
>  drivers/gpu/drm/i915/i915_request.c | 38 +++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 5c2c93cbab12..7c7746ef0d1b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1258,6 +1258,28 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request)
>  	return true;
>  }
>  
> +struct pm_qos {
> +	struct pm_qos_request req;
> +	struct work_struct add, del;
> +};
> +
> +static void pm_qos_add(struct work_struct *work)
> +{
> +	struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), add);
> +
> +	pm_qos_add_request(&pm_qos->req, PM_QOS_CPU_DMA_LATENCY, 50);
> +}
> +
> +static void pm_qos_del(struct work_struct *work)
> +{
> +	struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), del);
> +
> +	if (!cancel_work_sync(&pm_qos->add))
> +		pm_qos_remove_request(&pm_qos->req);
> +
> +	kfree(pm_qos);
> +}
> +
>  /**
>   * i915_request_wait - wait until execution of request has finished
>   * @rq: the request to wait upon
> @@ -1286,6 +1308,7 @@ long i915_request_wait(struct i915_request *rq,
>  	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
>  	DEFINE_WAIT_FUNC(reset, default_wake_function);
>  	DEFINE_WAIT_FUNC(exec, default_wake_function);
> +	struct pm_qos *pm_qos = NULL;
>  	struct intel_wait wait;
>  
>  	might_sleep();
> @@ -1363,6 +1386,19 @@ long i915_request_wait(struct i915_request *rq,
>  			break;
>  		}
>  
> +		if (!pm_qos &&
> +		    i915_seqno_passed(intel_engine_get_seqno(rq->engine),
> +				      wait.seqno - 1)) {
> +			pm_qos = kzalloc(sizeof(*pm_qos),
> +					 GFP_NOWAIT | __GFP_NOWARN);
> +			if (pm_qos) {
> +				INIT_WORK(&pm_qos->add, pm_qos_add);
> +				INIT_WORK(&pm_qos->del, pm_qos_del);
> +				schedule_work_on(smp_processor_id(),
> +						 &pm_qos->add);
> +			}

My immediate thoughts are on why this and not preallocate pm_qos
on init/fini and just add/remove requests on suitable spots?

-Mika

> +		}
> +
>  		timeout = io_schedule_timeout(timeout);
>  
>  		if (intel_wait_complete(&wait) &&
> @@ -1412,6 +1448,8 @@ long i915_request_wait(struct i915_request *rq,
>  	if (flags & I915_WAIT_LOCKED)
>  		remove_wait_queue(errq, &reset);
>  	remove_wait_queue(&rq->execute, &exec);
> +	if (pm_qos)
> +		schedule_work(&pm_qos->del);
>  	trace_i915_request_wait_end(rq);
>  
>  	return timeout;
> -- 
> 2.18.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx