[Intel-gfx] [PATCH 14/18] drm/i915/guc: Prepare for nonblocking execbuf submission
Dave Gordon
david.s.gordon at intel.com
Fri Sep 2 18:20:52 UTC 2016
On 30/08/16 09:18, Chris Wilson wrote:
> Currently the presumption is that the request construction and its
> submission to the GuC are all under the same holding of struct_mutex. We
> wish to relax this to separate the request construction and the later
> submission to the GuC. This requires us to reserve some space in the
> GuC command queue for the future submission. For flexibility to handle
> out-of-order request submission we do not preallocate the next slot in
> the GuC command queue during request construction, just ensuring that
> there is enough space later.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_guc_submission.c | 55 ++++++++++++++----------------
> drivers/gpu/drm/i915/intel_guc.h | 3 ++
> 2 files changed, 29 insertions(+), 29 deletions(-)
Hmm .. the functional changes look mostly OK, apart from some locking,
but there seems to be a great deal of unnecessary churn, such as
combining statements which had been kept separate for clarity :(
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 2332f9c98bdd..a047e61adc2a 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -434,20 +434,23 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
> {
> const size_t wqi_size = sizeof(struct guc_wq_item);
> struct i915_guc_client *gc = request->i915->guc.execbuf_client;
> - struct guc_process_desc *desc;
> + struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
> u32 freespace;
> + int ret;
>
> - GEM_BUG_ON(gc == NULL);
> -
> - desc = gc->client_base + gc->proc_desc_offset;
> -
> + spin_lock(&gc->lock);
> freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
> - if (likely(freespace >= wqi_size))
> - return 0;
> -
> - gc->no_wq_space += 1;
> + freespace -= gc->wq_rsvd;
> + if (likely(freespace >= wqi_size)) {
> + gc->wq_rsvd += wqi_size;
> + ret = 0;
> + } else {
> + gc->no_wq_space++;
> + ret = -EAGAIN;
> + }
> + spin_unlock(&gc->lock);
>
> - return -EAGAIN;
> + return ret;
> }
>
> static void guc_add_workqueue_item(struct i915_guc_client *gc,
> @@ -457,22 +460,9 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
> const size_t wqi_size = sizeof(struct guc_wq_item);
> const u32 wqi_len = wqi_size/sizeof(u32) - 1;
> struct intel_engine_cs *engine = rq->engine;
> - struct guc_process_desc *desc;
> struct guc_wq_item *wqi;
> void *base;
> - u32 freespace, tail, wq_off, wq_page;
> -
> - desc = gc->client_base + gc->proc_desc_offset;
> -
> - /* Free space is guaranteed, see i915_guc_wq_check_space() above */
This comment seems to have been lost. It still applies (mutatis
mutandis), so it should be relocated to some part of the new version ...
> - freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
> - GEM_BUG_ON(freespace < wqi_size);
> -
> - /* The GuC firmware wants the tail index in QWords, not bytes */
> - tail = rq->tail;
> - GEM_BUG_ON(tail & 7);
> - tail >>= 3;
> - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
This *commented* sequence of statements seems clearer than the
replacement below ...
> + u32 wq_off, wq_page;
>
> /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
> * should not have the case where structure wqi is across page, neither
> @@ -482,18 +472,19 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
> * workqueue buffer dw by dw.
> */
> BUILD_BUG_ON(wqi_size != 16);
This would be a good place to note that:
/* Reserved space is guaranteed, see i915_guc_wq_check_space() above */
> + GEM_BUG_ON(gc->wq_rsvd < wqi_size);
>
> /* postincrement WQ tail for next time */
> wq_off = gc->wq_tail;
> + GEM_BUG_ON(wq_off & (wqi_size - 1));
> gc->wq_tail += wqi_size;
> gc->wq_tail &= gc->wq_size - 1;
> - GEM_BUG_ON(wq_off & (wqi_size - 1));
> + gc->wq_rsvd -= wqi_size;
>
> /* WQ starts from the page after doorbell / process_desc */
> wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
> - wq_off &= PAGE_SIZE - 1;
> base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page));
> - wqi = (struct guc_wq_item *)((char *)base + wq_off);
> + wqi = (struct guc_wq_item *)((char *)base + offset_in_page(wq_off));
>
> /* Now fill in the 4-word work queue item */
> wqi->header = WQ_TYPE_INORDER |
> @@ -504,7 +495,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
> /* The GuC wants only the low-order word of the context descriptor */
> wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
>
> - wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
> + wqi->ring_tail = rq->tail >> 3 << WQ_RING_TAIL_SHIFT;
This line is particularly ugly. I think it's the >> chevron << effect.
Parenthesisation would help, but it would be nicer as separate lines.
Also, there's no explanation of the "3" here, unlike the original
version above.
> wqi->fence_id = rq->fence.seqno;
>
> kunmap_atomic(base);
> @@ -591,8 +582,10 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
> struct i915_guc_client *client = guc->execbuf_client;
> int b_ret;
>
> + spin_lock(&client->lock);
> guc_add_workqueue_item(client, rq);
> b_ret = guc_ring_doorbell(client);
> + spin_unlock(&client->lock);
>
> client->submissions[engine_id] += 1;
Outside the spinlock? Do we still have the BKL during submit(), just not
during i915_guc_wq_check_space() ? If so, then guc_ring_doorbell()
doesn't strictly need to be inside the spinlock (or the lock could be
inside guc_add_workqueue_item()); but if not then the update of
submissions[] should be inside.
> client->retcode = b_ret;
> @@ -770,6 +763,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
> if (!client)
> return NULL;
>
> + spin_lock_init(&client->lock);
> +
> client->owner = ctx;
> client->guc = guc;
> client->engines = engines;
> @@ -1019,9 +1014,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
> engine->submit_request = i915_guc_submit;
>
> /* Replay the current set of previously submitted requests */
> - list_for_each_entry(request, &engine->request_list, link)
> + list_for_each_entry(request, &engine->request_list, link) {
> + client->wq_rsvd += sizeof(struct guc_wq_item);
Presumably this is being called in some context that ensures that we
don't need to hold the spinlock while updating wq_rsvd ? Maybe that
should be mentioned ...
> if (i915_sw_fence_done(&request->submit))
> i915_guc_submit(request);
> + }
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index c97326269588..27a622824b54 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -68,6 +68,8 @@ struct i915_guc_client {
> struct i915_gem_context *owner;
> struct intel_guc *guc;
>
> + spinlock_t lock;
It would be helpful if this lock were annotated with a list of things it
protects. AFAICT it might be:
wq_tail
wq_rsvd
no_wq_space
cookie
b_fail and the submissions[] statistics seem to have been left
unprotected :( though of course they're not really critical
.Dave.
> +
> uint32_t engines; /* bitmap of (host) engine ids */
> uint32_t priority;
> uint32_t ctx_index;
> @@ -81,6 +83,7 @@ struct i915_guc_client {
> uint32_t wq_offset;
> uint32_t wq_size;
> uint32_t wq_tail;
> + uint32_t wq_rsvd;
> uint32_t no_wq_space;
> uint32_t b_fail;
> int retcode;
More information about the Intel-gfx
mailing list