[Intel-gfx] [PATCH 1/4] drm/i915: Get active pending request for given context
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Sep 21 12:39:51 UTC 2018
On 21/09/2018 10:13, kedar.j.karanje at intel.com wrote:
> From: Praveen Diwakar <praveen.diwakar at intel.com>
>
> This patch gives us the active pending request count which is yet
> to be submitted to the GPU
>
> Change-Id: I10c2828ad0f1a0b7af147835737134e07a2d5b6d
> Signed-off-by: Praveen Diwakar <praveen.diwakar at intel.com>
> Signed-off-by: Yogesh Marathe <yogesh.marathe at intel.com>
> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar at intel.com>
> Signed-off-by: Kedar J Karanje <kedar.j.karanje at intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 5 +++++
> drivers/gpu/drm/i915/i915_gem_context.c | 1 +
> drivers/gpu/drm/i915/i915_gem_context.h | 6 ++++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++++
> drivers/gpu/drm/i915/intel_lrc.c | 6 ++++++
> 6 files changed, 24 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f8cfd16..d37c46e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -903,6 +903,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
> mutex_init(&dev_priv->av_mutex);
> mutex_init(&dev_priv->wm.wm_mutex);
> mutex_init(&dev_priv->pps_mutex);
> + mutex_init(&dev_priv->pred_mutex);
>
> i915_memcpy_init_early(dev_priv);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4aca534..137ec33 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1609,6 +1609,11 @@ struct drm_i915_private {
> * controller on different i2c buses. */
> struct mutex gmbus_mutex;
>
> + /** pred_mutex protects against councurrent usage of pending
> + * request counter for multiple contexts
> + */
> + struct mutex pred_mutex;
> +
> /**
> * Base address of the gmbus and gpio block.
> */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b10770c..30932d9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -387,6 +387,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> }
>
> trace_i915_context_create(ctx);
> + ctx->req_cnt = 0;
>
> return ctx;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index b116e49..243ea22 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -194,6 +194,12 @@ struct i915_gem_context {
> * context close.
> */
> struct list_head handles_list;
> +
> + /** req_cnt: tracks the pending commands, based on which we decide to
> + * go for low/medium/high load configuration of the GPU, this is
> + * controlled via a mutex
> + */
> + u64 req_cnt;
64-bit is too wide and mutex causes you problems later in the series.
I'd suggest atomic_t.
> };
>
> static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3f0c612..f799ff9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2178,6 +2178,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> struct drm_syncobj **fences)
> {
> struct i915_execbuffer eb;
> + struct drm_i915_private *dev_priv = to_i915(dev);
> struct dma_fence *in_fence = NULL;
> struct sync_file *out_fence = NULL;
> int out_fence_fd = -1;
> @@ -2390,6 +2391,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> */
> eb.request->batch = eb.batch;
>
> + mutex_lock(&dev_priv->pred_mutex);
> + eb.ctx->req_cnt++;
> + mutex_unlock(&dev_priv->pred_mutex);
> +
> trace_i915_request_queue(eb.request, eb.batch_flags);
> err = eb_submit(&eb);
> err_request:
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1744792..039fbdb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -728,6 +728,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> trace_i915_request_in(rq, port_index(port, execlists));
> last = rq;
> submit = true;
> +
> + mutex_lock(&rq->i915->pred_mutex);
> + if (rq->gem_context->req_cnt > 0) {
Presumably you hit underflow here and then added the protection. This
was a hint the accounting does not work as expected. Preemption for
instance would wreak havoc with it. :)
Have a look at my per engine queued/runnable counters for how to do it
correctly, only that you would need to make it per context and merge
into one aggregated queued+runnable instead of two separate counters, if
you wanted to preserve your heuristics.
https://patchwork.freedesktop.org/patch/227981/
https://patchwork.freedesktop.org/patch/227976/
Regards,
Tvrtko
> + rq->gem_context->req_cnt--;
> + }
> + mutex_unlock(&rq->i915->pred_mutex);
> }
>
> rb_erase_cached(&p->node, &execlists->queue);
>
More information about the Intel-gfx
mailing list