[Intel-gfx] [PATCH 3/4] drm/i915/guc: Make adding GuC work items lockless
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 12 13:40:51 UTC 2017
Quoting MichaĆ Winiarski (2017-09-12 13:47:25)
> @@ -416,15 +416,15 @@ static void guc_wq_item_append(struct i915_guc_client *client,
> struct intel_engine_cs *engine = rq->engine;
> struct guc_process_desc *desc = __get_process_desc(client);
> struct guc_wq_item *wqi;
> - u32 freespace, tail, wq_off;
> + u32 freespace, ring_tail, wq_off, wq_next;
>
> /* Free space is guaranteed */
> - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
> + freespace = CIRC_SPACE(desc->tail, desc->head, GUC_WQ_SIZE);
> GEM_BUG_ON(freespace < wqi_size);
Fwiw, I would move this to the cmpxchg loop.
GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), GUC_WQ_SIZE) < wqi_size);
>
> /* The GuC firmware wants the tail index in QWords, not bytes */
> - tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
> - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
> + ring_tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
> + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
>
> /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
> * should not have the case where structure wqi is across page, neither
> @@ -435,11 +435,12 @@ static void guc_wq_item_append(struct i915_guc_client *client,
> */
> BUILD_BUG_ON(wqi_size != 16);
>
> - /* postincrement WQ tail for next time */
> - wq_off = client->wq_tail;
> + /* Find our offset and postincrement WQ tail for next time */
> + do {
> + wq_off = desc->tail;
wq_off = READ_ONCE(desc->tail);
> + wq_next = (wq_off + wqi_size) & (GUC_WQ_SIZE - 1);
> + } while (cmpxchg(&desc->tail, wq_off, wq_next) != wq_off);
> GEM_BUG_ON(wq_off & (wqi_size - 1));
> - client->wq_tail += wqi_size;
> - client->wq_tail &= client->wq_size - 1;
>
> /* WQ starts from the page after doorbell / process_desc */
> wqi = client->vaddr + wq_off + GUC_DB_SIZE;
> @@ -453,7 +454,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
> /* The GuC wants only the low-order word of the context descriptor */
> wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
>
> - wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT;
> + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
> wqi->fence_id = rq->global_seqno;
> }
>
> @@ -463,20 +464,14 @@ static void guc_reset_wq(struct i915_guc_client *client)
>
> desc->head = 0;
> desc->tail = 0;
> -
> - client->wq_tail = 0;
> }
>
> static int guc_ring_doorbell(struct i915_guc_client *client)
> {
> - struct guc_process_desc *desc = __get_process_desc(client);
> union guc_doorbell_qw db_cmp, db_exc, db_ret;
> union guc_doorbell_qw *db;
> int attempt = 2, ret = -EAGAIN;
>
> - /* Update the tail so it is visible to GuC */
> - desc->tail = client->wq_tail;
> -
> /* current cookie */
> db_cmp.db_status = GUC_DOORBELL_ENABLED;
> db_cmp.cookie = client->doorbell_cookie;
> @@ -535,7 +530,6 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
> struct execlist_port *port = engine->execlist_port;
> unsigned int engine_id = engine->id;
> unsigned int n;
> - unsigned long flags;
>
> for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
> struct drm_i915_gem_request *rq;
> @@ -548,14 +542,10 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
> if (i915_vma_is_map_and_fenceable(rq->ring->vma))
> POSTING_READ_FW(GUC_STATUS);
>
> - spin_lock_irqsave(&client->wq_lock, flags);
> -
> guc_wq_item_append(client, rq);
> WARN_ON(guc_ring_doorbell(client));
>
> client->submissions[engine_id] += 1;
Per-engine, so this is actually serialized by the tasklet. Hmm, double
accounting after reset. But do I care? I consider it to be pointless
since we are counting at the wrong boundary.
I think we need the READ_ONCE to be clear to both the compiler and
ourselves that we are reading transient values shared with the guc. But
since that's the only issue I could see,
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris
More information about the Intel-gfx
mailing list