[Intel-gfx] [PATCH v3 3/8] drm/i915/guc: Submit GuC workitems containing coalesced requests
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Tue May 23 00:07:35 UTC 2017
On 22/05/17 15:07, Michał Winiarski wrote:
> To create an upper bound on number of GuC workitems, we need to change
> the way that requests are being submitted. Rather than submitting each
> request as an individual workitem, we can do coalescing in a similar way
> we're handlig execlist submission ports. We also need to stop pretending
> that we're doing "lite-restore" in GuC submission (we would create a
> workitem each time we hit this condition).
>
> v2: Also coalesce when replaying on reset (Daniele)
> v3: Consistent wq_resv - per-request (Daniele)
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Jeff McGee <jeff.mcgee at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
> ---
> drivers/gpu/drm/i915/i915_guc_submission.c | 72 +++++++++++++++---------------
> 1 file changed, 37 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index e6e0c6e..2a0c3161 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -491,14 +491,12 @@ static void guc_wq_item_append(struct i915_guc_client *client,
> * workqueue buffer dw by dw.
> */
> BUILD_BUG_ON(wqi_size != 16);
> - GEM_BUG_ON(client->wq_rsvd < wqi_size);
>
> /* postincrement WQ tail for next time */
> wq_off = client->wq_tail;
> GEM_BUG_ON(wq_off & (wqi_size - 1));
> client->wq_tail += wqi_size;
> client->wq_tail &= client->wq_size - 1;
> - client->wq_rsvd -= wqi_size;
>
> /* WQ starts from the page after doorbell / process_desc */
> wqi = client->vaddr + wq_off + GUC_DB_SIZE;
> @@ -580,7 +578,7 @@ static int guc_ring_doorbell(struct i915_guc_client *client)
> }
>
> /**
> - * __i915_guc_submit() - Submit commands through GuC
> + * i915_guc_submit() - Submit commands through GuC
> * @rq: request associated with the commands
> *
> * The caller must have already called i915_guc_wq_reserve() above with
> @@ -594,7 +592,7 @@ static int guc_ring_doorbell(struct i915_guc_client *client)
> * The only error here arises if the doorbell hardware isn't functioning
> * as expected, which really shouln't happen.
> */
> -static void __i915_guc_submit(struct drm_i915_gem_request *rq)
> +static void i915_guc_submit(struct drm_i915_gem_request *rq)
> {
> struct drm_i915_private *dev_priv = rq->i915;
> struct intel_engine_cs *engine = rq->engine;
> @@ -618,12 +616,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
> spin_unlock_irqrestore(&client->wq_lock, flags);
> }
>
> -static void i915_guc_submit(struct drm_i915_gem_request *rq)
> -{
> - __i915_gem_request_submit(rq);
> - __i915_guc_submit(rq);
> -}
> -
> static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> {
> /* If we use dma_fence_enable_sw_signaling() directly, lockdep
> @@ -659,12 +651,15 @@ static void port_assign(struct execlist_port *port,
> nested_enable_signaling(rq);
> }
>
> -static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> +static void i915_guc_dequeue(struct intel_engine_cs *engine)
> {
> struct execlist_port *port = engine->execlist_port;
> - struct drm_i915_gem_request *last = port_request(port);
> - struct rb_node *rb;
> + struct drm_i915_gem_request *last = NULL;
> bool submit = false;
> + struct rb_node *rb;
> +
> + if (port_request(port))
> + port++;
>
> spin_lock_irq(&engine->timeline->lock);
> rb = engine->execlist_first;
> @@ -681,18 +676,22 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> goto done;
> }
>
> - if (submit)
> + if (submit) {
> port_assign(port, last);
> + i915_guc_submit(last);
> + submit = false;
> + }
> port++;
> }
>
> INIT_LIST_HEAD(&rq->priotree.link);
> rq->priotree.priority = INT_MAX;
>
> - i915_guc_submit(rq);
> + __i915_gem_request_submit(rq);
> trace_i915_gem_request_in(rq, port_index(port, engine));
> last = rq;
> submit = true;
> + i915_guc_wq_unreserve(rq);
Bikeshed: doesn't moving the removal of the reservation to before the
actual WQ tail update actually theoretically introduce the possibility
of racing over-reserving? It can't hurt anyway because of the
coalescing, but it might be worth squashing the next patch into this one
to make things cleaner.
> }
>
> rb = rb_next(rb);
> @@ -703,11 +702,11 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> }
> done:
> engine->execlist_first = rb;
> - if (submit)
> + if (submit) {
> port_assign(port, last);
> + i915_guc_submit(last);
> + }
> spin_unlock_irq(&engine->timeline->lock);
> -
> - return submit;
> }
>
> static void i915_guc_irq_handler(unsigned long data)
> @@ -715,24 +714,20 @@ static void i915_guc_irq_handler(unsigned long data)
> struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> struct execlist_port *port = engine->execlist_port;
> struct drm_i915_gem_request *rq;
> - bool submit;
>
> - do {
> - rq = port_request(&port[0]);
> - while (rq && i915_gem_request_completed(rq)) {
> - trace_i915_gem_request_out(rq);
> - i915_gem_request_put(rq);
> + rq = port_request(&port[0]);
> + while (rq && i915_gem_request_completed(rq)) {
> + trace_i915_gem_request_out(rq);
> + i915_gem_request_put(rq);
>
> - port[0] = port[1];
> - memset(&port[1], 0, sizeof(port[1]));
> + port[0] = port[1];
> + memset(&port[1], 0, sizeof(port[1]));
>
> - rq = port_request(&port[0]);
> - }
> + rq = port_request(&port[0]);
> + }
>
> - submit = false;
> - if (!port_count(&port[1]))
> - submit = i915_guc_dequeue(engine);
> - } while (submit);
> + if (!port_isset(&port[1]))
> + i915_guc_dequeue(engine);
> }
>
> /*
> @@ -1250,6 +1245,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
> for_each_engine(engine, dev_priv, id) {
> const int wqi_size = sizeof(struct guc_wq_item);
> struct drm_i915_gem_request *rq;
> + struct execlist_port *port = engine->execlist_port;
> + int n;
>
> /* The tasklet was initialised by execlists, and may be in
> * a state of flux (across a reset) and so we just want to
> @@ -1261,11 +1258,16 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>
> /* Replay the current set of previously submitted requests */
> spin_lock_irq(&engine->timeline->lock);
> - list_for_each_entry(rq, &engine->timeline->requests, link) {
> + list_for_each_entry(rq, &engine->timeline->requests, link)
> guc_client_update_wq_rsvd(client, wqi_size);
Can we just drop this entirely due to the new changes in this version of
the patch? We don't require wq_rsvd to be > 0 in i915_guc_submit anymore
and we're guaranteed to have space (the wq is empty and we're only going
to submit up to 2 items). Otherwise, aren't we going to leak this rsvd
space since we don't clean it in i915_guc_submit anymore?
Thanks,
Daniele
> - __i915_guc_submit(rq);
> - }
> spin_unlock_irq(&engine->timeline->lock);
> +
> + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
> + if (!port_isset(&port[n]))
> + break;
> +
> + i915_guc_submit(port_request(&port[n]));
> + }
> }
>
> return 0;
>
More information about the Intel-gfx
mailing list