[Intel-gfx] [PATCH v2 13/18] drm/i915: Enable commands submission via GuC
yu.dai at intel.com
yu.dai at intel.com
Fri Apr 3 11:08:39 PDT 2015
From: Alex Dai <yu.dai at intel.com>
Add functions to submit work queue item and ring the door bell.
GuC TLB needs to be invalided if LRC context changes.
Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai at intel.com>
---
drivers/gpu/drm/i915/intel_guc.h | 3 +
drivers/gpu/drm/i915/intel_guc_client.c | 182 ++++++++++++++++++++++++++++-
drivers/gpu/drm/i915/intel_guc_scheduler.c | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 16 ++-
4 files changed, 200 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 4d9fb34..c44b3c0 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -31,6 +31,7 @@
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
struct i915_guc_client {
+ spinlock_t wq_lock;
struct drm_i915_gem_object *client_obj;
u32 priority;
off_t doorbell_offset;
@@ -39,6 +40,8 @@ struct i915_guc_client {
uint16_t doorbell_id;
uint32_t ctx_index;
uint32_t wq_size;
+ uint32_t wq_tail;
+ uint32_t cookie;
};
#define I915_MAX_DOORBELLS 256
diff --git a/drivers/gpu/drm/i915/intel_guc_client.c b/drivers/gpu/drm/i915/intel_guc_client.c
index ae6323a..d64cad6 100644
--- a/drivers/gpu/drm/i915/intel_guc_client.c
+++ b/drivers/gpu/drm/i915/intel_guc_client.c
@@ -22,6 +22,7 @@
*
*/
#include <linux/firmware.h>
+#include <linux/circ_buf.h>
#include "i915_drv.h"
#include "intel_guc.h"
@@ -52,6 +53,14 @@
* Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
* mapped into process space.
*
+ * Work Items:
+ * There are several types of work items that the host may place into a
+ * workqueue, each with its own requirements and limitations. Currently only
+ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
+ * represents in-order queue. The kernel driver packs ring tail pointer and an
+ * ELSP context descriptor dword into Work Item.
+ * See add_workqueue_item()
+ *
*/
/*
@@ -411,6 +420,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority)
/* XXX: evict a doorbell instead */
goto err;
+ spin_lock_init(&client->wq_lock);
+
init_ctx_desc(guc, client);
init_proc_desc(guc, client);
init_doorbell(guc, client);
@@ -430,6 +441,167 @@ err:
return NULL;
}
+/* Get valid workqueue item and return it back to offset */
+static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
+{
+ struct guc_process_desc *desc;
+ void *base;
+ u32 size = sizeof(struct guc_wq_item);
+ int ret = 0, timeout_counter = 200;
+ unsigned long flags;
+
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+ desc = base + gc->proc_desc_offset;
+
+ while (timeout_counter-- > 0) {
+ spin_lock_irqsave(&gc->wq_lock, flags);
+
+ ret = wait_for(CIRC_SPACE(gc->wq_tail, desc->head,
+ gc->wq_size) >= size, 1);
+
+ if (!ret) {
+ *offset = gc->wq_tail;
+
+ /* advance the tail for next workqueue item */
+ gc->wq_tail += size;
+ gc->wq_tail &= gc->wq_size - 1;
+
+ /* this will break the loop */
+ timeout_counter = 0;
+ }
+
+ spin_unlock_irqrestore(&gc->wq_lock, flags);
+ };
+
+ kunmap_atomic(base);
+
+ return ret;
+}
+
+
+static int add_workqueue_item(struct i915_guc_client *gc,
+ struct intel_context *ctx,
+ struct intel_engine_cs *ring)
+{
+ struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_gem_object *ctx_obj;
+ struct guc_wq_item *wqi;
+ void *base;
+ struct page *page;
+ u32 wq_off = 0, tail = ringbuf->tail, wq_len;
+ int ret;
+
+ ctx_obj = ctx->engine[ring->id].state;
+
+ WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
+ WARN_ON(!i915_gem_obj_is_pinned(ringbuf->obj));
+
+ /* Need this because of the deferred pin ctx and ring */
+ /* Shall we move this right after ring is pinned? */
+ page = i915_gem_object_get_page(ctx_obj, 1);
+ base = kmap_atomic(page);
+
+ *((u32 *)base + CTX_RING_BUFFER_START + 1) =
+ i915_gem_obj_ggtt_offset(ringbuf->obj);
+
+ kunmap_atomic(base);
+
+ ret = get_workqueue_space(gc, &wq_off);
+ if (ret)
+ return ret;
+
+ /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
+ * should not have the case where structure wqi is across page, neither
+ * wrapped to the beginning. This simplifies the implementation below.
+ *
+ * XXX: if not the case, we need save data to a temp wqi and copy it to
+ * workqueue buffer dw by dw.
+ */
+ WARN_ON(sizeof(struct guc_wq_item) != 16);
+ WARN_ON(wq_off & 3);
+
+ /* wq starts from the page after doorbell / process_desc */
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj,
+ (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT));
+ wq_off &= PAGE_SIZE - 1;
+ wqi = (struct guc_wq_item *)((char *)base + wq_off);
+
+ /* len does not include the header */
+ wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
+ wqi->header = WQ_TYPE_INORDER |
+ (wq_len << WQ_LEN_SHIFT) |
+ (ring->id << WQ_TARGET_SHIFT) |
+ WQ_NO_WCFLUSH_WAIT;
+
+ wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj);
+ /* tail index is in qw */
+ tail >>= 3;
+ wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+ wqi->fence_id = 0; /*XXX: what fence to be here */
+
+ kunmap_atomic(base);
+
+ return 0;
+}
+
+static int ring_doorbell(struct i915_guc_client *gc)
+{
+ struct guc_process_desc *desc;
+ union guc_doorbell_qw db_cmp, db_exc, db_ret;
+ union guc_doorbell_qw *db;
+ void *base;
+ int attempt = 2, ret = -EAGAIN;
+
+ base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
+ desc = base + gc->proc_desc_offset;
+
+ /* Update the tail so it is visible to GuC */
+ desc->tail = gc->wq_tail;
+
+ /* current cookie */
+ db_cmp.db_status = GUC_DOORBELL_ENABLED;
+ db_cmp.cookie = gc->cookie;
+
+ /* cookie to be updated */
+ db_exc.db_status = GUC_DOORBELL_ENABLED;
+ db_exc.cookie = gc->cookie + 1;
+ if (db_exc.cookie == 0)
+ db_exc.cookie = 1;
+
+ /* pointer of current doorbell cacheline */
+ db = base + gc->doorbell_offset;
+
+ while (attempt--) {
+ /* lets ring the doorbell */
+ db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
+ db_cmp.value_qw, db_exc.value_qw);
+
+ /* if the exchange was successfully executed */
+ if (db_ret.value_qw == db_cmp.value_qw) {
+ /* db was successfully rung */
+ gc->cookie = db_exc.cookie;
+ ret = 0;
+ break;
+ }
+
+ /* XXX: doorbell was lost and need to acquire it again */
+ if (db_ret.db_status == GUC_DOORBELL_DISABLED)
+ break;
+
+ DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
+ db_cmp.cookie, db_ret.cookie);
+
+ /* update the cookie to newly read cookie from GuC */
+ db_cmp.cookie = db_ret.cookie;
+ db_exc.cookie = db_ret.cookie + 1;
+ if (db_exc.cookie == 0)
+ db_exc.cookie = 1;
+ }
+
+ kunmap_atomic(base);
+ return ret;
+}
+
/**
* i915_guc_client_submit() - Submit commands through GuC
* @client: the guc client where commands will go through
@@ -442,5 +614,13 @@ int i915_guc_client_submit(struct i915_guc_client *client,
struct intel_context *ctx,
struct intel_engine_cs *ring)
{
- return 0;
+ int ret;
+
+ ret = add_workqueue_item(client, ctx, ring);
+ if (ret)
+ return ret;
+
+ ret = ring_doorbell(client);
+
+ return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c
index 008f74c..c0b7231 100644
--- a/drivers/gpu/drm/i915/intel_guc_scheduler.c
+++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c
@@ -160,6 +160,6 @@ bool sanitize_enable_guc_scheduling(struct drm_device *dev)
if (!HAS_GUC_UCODE(dev) || !HAS_GUC_SCHED(dev))
return false;
- return i915.enable_guc_scheduling;
+ return i915.enable_execlists && i915.enable_guc_scheduling;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d4011b4..cb5a617 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -618,13 +618,17 @@ intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
struct drm_i915_gem_request *request)
{
struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
intel_logical_ring_advance(ringbuf);
if (intel_ring_stopped(ring))
return;
- execlists_context_queue(ring, ctx, ringbuf->tail, request);
+ if (dev_priv->guc.execbuf_client)
+ i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring);
+ else
+ execlists_context_queue(ring, ctx, ringbuf->tail, request);
}
static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
@@ -941,6 +945,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
{
struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret = 0;
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -953,6 +958,10 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
if (ret)
goto unpin_ctx_obj;
+
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_scheduling)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
return ret;
@@ -1284,8 +1293,13 @@ out:
static int gen8_init_rcs_context(struct intel_engine_cs *ring,
struct intel_context *ctx)
{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret;
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_scheduling)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
ret = intel_logical_ring_workarounds_emit(ring, ctx);
if (ret)
return ret;
--
1.9.1
More information about the Intel-gfx
mailing list