[Intel-gfx] [29/33] drm/i915/gt: Expose busywait duration to sysfs
Steve Carbonari
steven.carbonari at intel.com
Wed Jan 22 21:29:14 UTC 2020
On Thu, Dec 12, 2019 at 02:04:55PM +0000, Chris Wilson wrote:
> We busywait on an inflight request (one that is currently executing on
> HW, and so might complete quickly) prior to setting up an interrupt and
> sleeping. The trade off is that we keep an expensive CPU core busy in
> order to avoid wake up latency: where that trade off should lie is best
> left to the sysadmin.
>
> The busywait mechanism can be compiled out with
>
> ./scripts/config --set-val DRM_I915_SPIN_REQUEST 0
>
> The maximum busywait duration can be adjusted per-engine using,
>
> /sys/class/drm/card?/engine/*/ms_busywait_duration_ns
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Code looks good.
Tested the patch with previous sysfs related patches in the series.
The directories and files show up with expected values.
Busywait can be changed.
Reviewed-by: Steve Carbonari <steven.carbonari at intel.com>
Tested-by: Steve Carbonari <steven.carbonari at intel.com
> ---
> drivers/gpu/drm/i915/Kconfig.profile | 9 ++--
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +
> drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 49 ++++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
> drivers/gpu/drm/i915/i915_request.c | 19 ++++----
> 5 files changed, 68 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index d8d4a16179bd..9ee3b59685b9 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -35,9 +35,9 @@ config DRM_I915_PREEMPT_TIMEOUT
>
> May be 0 to disable the timeout.
>
> -config DRM_I915_SPIN_REQUEST
> - int "Busywait for request completion (us)"
> - default 5 # microseconds
> +config DRM_I915_MAX_REQUEST_BUSYWAIT
> + int "Busywait for request completion limit (ns)"
> + default 8000 # nanoseconds
> help
> Before sleeping waiting for a request (GPU operation) to complete,
> we may spend some time polling for its completion. As the IRQ may
> @@ -45,6 +45,9 @@ config DRM_I915_SPIN_REQUEST
> check if the request will complete in the time it would have taken
> us to enable the interrupt.
>
> + This is adjustable via
> + /sys/class/drm/card?/engine/*/max_busywait_duration_ns
> +
> May be 0 to disable the initial spin. In practice, we estimate
> the cost of enabling the interrupt (if currently disabled) to be
> a few microseconds.
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 9e16d8a9bbf5..9162b94a7b80 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -312,6 +312,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>
> engine->props.heartbeat_interval_ms =
> CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
> + engine->props.max_busywait_duration_ns =
> + CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
> engine->props.preempt_timeout_ms =
> CONFIG_DRM_I915_PREEMPT_TIMEOUT;
> engine->props.stop_timeout_ms =
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> index b1bd768b13d7..6d87529c64a7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> @@ -142,6 +142,54 @@ all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> static struct kobj_attribute all_caps_attr =
> __ATTR(known_capabilities, 0444, all_caps_show, NULL);
>
> +static ssize_t
> +max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct intel_engine_cs *engine = kobj_to_engine(kobj);
> + unsigned long long duration;
> + int err;
> +
> + /*
> + * When waiting for a request, if is it currently being executed
> + * on the GPU, we busywait for a short while before sleeping. The
> + * premise is that most requests are short, and if it is already
> + * executing then there is a good chance that it will complete
> + * before we can setup the interrupt handler and go to sleep.
> + * We try to offset the cost of going to sleep, by first spinning
> + * on the request -- if it completed in less time than it would take
> + * to go sleep, process the interrupt and return back to the client,
> + * then we have saved the client some latency, albeit at the cost
> + * of spinning on an expensive CPU core.
> + *
> + * While we try to avoid waiting at all for a request that is unlikely
> + * to complete, deciding how long it is worth spinning is for is an
> + * arbitrary decision: trading off power vs latency.
> + */
> +
> + err = kstrtoull(buf, 0, &duration);
> + if (err)
> + return err;
> +
> + if (duration > jiffies_to_nsecs(2))
> + return -EINVAL;
> +
> + WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
> +
> + return count;
> +}
> +
> +static ssize_t
> +max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> + struct intel_engine_cs *engine = kobj_to_engine(kobj);
> +
> + return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
> +}
> +
> +static struct kobj_attribute max_spin_attr =
> +__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
> +
> static ssize_t
> timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
> const char *buf, size_t count)
> @@ -224,6 +272,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
> &mmio_attr.attr,
> &caps_attr.attr,
> &all_caps_attr.attr,
> + &max_spin_attr.attr,
> NULL
> };
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index ede7ce2695cd..807796d619e9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -525,6 +525,7 @@ struct intel_engine_cs {
>
> struct {
> unsigned long heartbeat_interval_ms;
> + unsigned long max_busywait_duration_ns;
> unsigned long preempt_timeout_ms;
> unsigned long stop_timeout_ms;
> unsigned long timeslice_duration_ms;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 15767c2d9ea1..2f6979009c8f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1381,7 +1381,7 @@ void i915_request_add(struct i915_request *rq)
> mutex_unlock(&tl->mutex);
> }
>
> -static unsigned long local_clock_us(unsigned int *cpu)
> +static unsigned long local_clock_ns(unsigned int *cpu)
> {
> unsigned long t;
>
> @@ -1398,7 +1398,7 @@ static unsigned long local_clock_us(unsigned int *cpu)
> * stop busywaiting, see busywait_stop().
> */
> *cpu = get_cpu();
> - t = local_clock() >> 10;
> + t = local_clock();
> put_cpu();
>
> return t;
> @@ -1408,15 +1408,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
> {
> unsigned int this_cpu;
>
> - if (time_after(local_clock_us(&this_cpu), timeout))
> + if (time_after(local_clock_ns(&this_cpu), timeout))
> return true;
>
> return this_cpu != cpu;
> }
>
> -static bool __i915_spin_request(const struct i915_request * const rq,
> - int state, unsigned long timeout_us)
> +static bool __i915_spin_request(const struct i915_request * const rq, int state)
> {
> + unsigned long timeout_ns;
> unsigned int cpu;
>
> /*
> @@ -1444,7 +1444,8 @@ static bool __i915_spin_request(const struct i915_request * const rq,
> * takes to sleep on a request, on the order of a microsecond.
> */
>
> - timeout_us += local_clock_us(&cpu);
> + timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
> + timeout_ns += local_clock_ns(&cpu);
> do {
> if (i915_request_completed(rq))
> return true;
> @@ -1452,7 +1453,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
> if (signal_pending_state(state, current))
> break;
>
> - if (busywait_stop(timeout_us, cpu))
> + if (busywait_stop(timeout_ns, cpu))
> break;
>
> cpu_relax();
> @@ -1538,8 +1539,8 @@ long i915_request_wait(struct i915_request *rq,
> * completion. That requires having a good predictor for the request
> * duration, which we currently lack.
> */
> - if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
> - __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
> + if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
> + __i915_spin_request(rq, state)) {
> dma_fence_signal(&rq->fence);
> goto out;
> }
More information about the Intel-gfx
mailing list