[Intel-gfx] [PATCH 4/5] drm/i915/guc: Make adding GuC work items lockless
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 12 15:37:30 UTC 2017
From: Michał Winiarski <michal.winiarski at intel.com>
We can get rid of a spinlock by updating the tail directly using
cmpxchg. We can also put guc client on a diet by removing some constants
from the struct.
This causes a small change in one of GuC debugfs files.
We're no longer reporting constant values (which I don't think is a
problem), but we're also no longer reporting the tail (does anyone care?).
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Oscar Mateo <oscar.mateo at intel.com>
Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20170912124726.19689-3-michal.winiarski@intel.com
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 --
drivers/gpu/drm/i915/i915_guc_submission.c | 39 ++++++++++--------------------
drivers/gpu/drm/i915/intel_uc.h | 5 ----
3 files changed, 13 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 40a315fc0e31..0bd5cacd288b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2447,8 +2447,6 @@ static void i915_guc_client_info(struct seq_file *m,
client->priority, client->stage_id, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n",
client->doorbell_id, client->doorbell_offset);
- seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
- client->wq_size, client->wq_offset, client->wq_tail);
for_each_engine(engine, dev_priv, id) {
u64 submissions = client->submissions[id];
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index d6f3a1837a2d..f4da655c7275 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -305,7 +305,7 @@ static void guc_proc_desc_init(struct intel_guc *guc,
desc->db_base_addr = 0;
desc->stage_id = client->stage_id;
- desc->wq_size_bytes = client->wq_size;
+ desc->wq_size_bytes = GUC_WQ_SIZE;
desc->wq_status = WQ_STATUS_ACTIVE;
desc->priority = client->priority;
}
@@ -390,8 +390,8 @@ static void guc_stage_desc_init(struct intel_guc *guc,
desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client);
desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
desc->process_desc = gfx_addr + client->proc_desc_offset;
- desc->wq_addr = gfx_addr + client->wq_offset;
- desc->wq_size = client->wq_size;
+ desc->wq_addr = gfx_addr + GUC_DB_SIZE;
+ desc->wq_size = GUC_WQ_SIZE;
desc->desc_private = (uintptr_t)client;
}
@@ -415,15 +415,15 @@ static void guc_wq_item_append(struct i915_guc_client *client,
struct intel_engine_cs *engine = rq->engine;
struct guc_process_desc *desc = __get_process_desc(client);
struct guc_wq_item *wqi;
- u32 freespace, tail, wq_off;
+ u32 freespace, ring_tail, wq_off, wq_next;
/* Free space is guaranteed */
- freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
+ freespace = CIRC_SPACE(desc->tail, desc->head, GUC_WQ_SIZE);
GEM_BUG_ON(freespace < wqi_size);
/* The GuC firmware wants the tail index in QWords, not bytes */
- tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
- GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
+ ring_tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
+ GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
* should not have the case where structure wqi is across page, neither
@@ -434,11 +434,12 @@ static void guc_wq_item_append(struct i915_guc_client *client,
*/
BUILD_BUG_ON(wqi_size != 16);
- /* postincrement WQ tail for next time */
- wq_off = client->wq_tail;
+ /* Find our offset and postincrement WQ tail for next time */
+ do {
+ wq_off = desc->tail;
+ wq_next = (wq_off + wqi_size) & (GUC_WQ_SIZE - 1);
+ } while (cmpxchg(&desc->tail, wq_off, wq_next) != wq_off);
GEM_BUG_ON(wq_off & (wqi_size - 1));
- client->wq_tail += wqi_size;
- client->wq_tail &= client->wq_size - 1;
/* WQ starts from the page after doorbell / process_desc */
wqi = client->vaddr + wq_off + GUC_DB_SIZE;
@@ -452,7 +453,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
/* The GuC wants only the low-order word of the context descriptor */
wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
- wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT;
+ wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
wqi->fence_id = rq->global_seqno;
}
@@ -462,19 +463,13 @@ static void guc_reset_wq(struct i915_guc_client *client)
desc->head = 0;
desc->tail = 0;
-
- client->wq_tail = 0;
}
static void guc_ring_doorbell(struct i915_guc_client *client)
{
- struct guc_process_desc *desc = __get_process_desc(client);
struct guc_doorbell_info *db = __get_doorbell(client);
u32 cookie;
- /* Update the tail so it is visible to GuC */
- desc->tail = client->wq_tail;
-
do {
cookie = READ_ONCE(db->cookie);
} while (cmpxchg(&db->cookie, cookie, cookie + 1) != cookie);
@@ -496,7 +491,6 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
struct execlist_port *port = engine->execlist_port;
unsigned int engine_id = engine->id;
unsigned int n;
- unsigned long flags;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
struct drm_i915_gem_request *rq;
@@ -509,14 +503,10 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
if (i915_vma_is_map_and_fenceable(rq->ring->vma))
POSTING_READ_FW(GUC_STATUS);
- spin_lock_irqsave(&client->wq_lock, flags);
-
guc_wq_item_append(client, rq);
guc_ring_doorbell(client);
client->submissions[engine_id] += 1;
-
- spin_unlock_irqrestore(&client->wq_lock, flags);
}
}
}
@@ -809,9 +799,6 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
client->engines = engines;
client->priority = priority;
client->doorbell_id = GUC_DOORBELL_INVALID;
- client->wq_offset = GUC_DB_SIZE;
- client->wq_size = GUC_WQ_SIZE;
- spin_lock_init(&client->wq_lock);
ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index f505dcccd613..386e0bcd0cc3 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -67,11 +67,6 @@ struct i915_guc_client {
u16 doorbell_id;
unsigned long doorbell_offset;
- spinlock_t wq_lock;
- uint32_t wq_offset;
- uint32_t wq_size;
- uint32_t wq_tail;
-
/* Per-engine counts of GuC submissions */
uint64_t submissions[I915_NUM_ENGINES];
};
--
2.14.1
More information about the Intel-gfx
mailing list