[PATCH 10/20] drm/i915/guc: New GuC stage descriptors

Michal Wajdeczko michal.wajdeczko at intel.com
Tue Aug 21 14:05:25 UTC 2018


New GuC stage descriptor stores information about all possible HW contexts
that use it. The idea is that every direct-submission GuC client gets one
SW Context ID and every HW context created by that client gets one SW
Counter (up to 64 entries). The correct SW Context ID and SW Counter now
get passed on every work queue item.

Signed-off-by: Michel Thierry <michel.thierry at intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Michel Thierry <michel.thierry at intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tomasz Lis <tomasz.lis at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c         |   9 +-
 drivers/gpu/drm/i915/i915_utils.h           |  12 ++
 drivers/gpu/drm/i915/intel_guc_fwif.h       |  67 ++++-----
 drivers/gpu/drm/i915/intel_guc_submission.c | 215 +++++++++++++++++++---------
 4 files changed, 200 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 26b7e52..5f90284 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2455,11 +2455,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 
 		seq_printf(m, "GuC stage descriptor %u:\n", index);
 		seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+		seq_printf(m, "\tProxy Index: %u\n", desc->proxy_id);
 		seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
 		seq_printf(m, "\tPriority: %d\n", desc->priority);
 		seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
-		seq_printf(m, "\tEngines used: 0x%x\n",
-			   desc->engines_used);
 		seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
 			   desc->db_trigger_phy,
 			   desc->db_trigger_cpu,
@@ -2471,10 +2470,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 		seq_putc(m, '\n');
 
 		for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-			u32 guc_engine_id = engine->guc_id;
+			u32 class = GUC_ID_TO_ENGINE_CLASS(engine->guc_id);
+			u32 inst = GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id);
 			struct guc_execlist_context *lrc =
-						&desc->lrc[guc_engine_id];
-
+						&desc->lrc[class][inst];
 			seq_printf(m, "\t%s LRC:\n", engine->name);
 			seq_printf(m, "\t\tContext desc: 0x%x\n",
 				   lrc->context_desc);
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 00165ad..3968a51 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -118,6 +118,18 @@ static inline u64 ptr_to_u64(const void *ptr)
 	__idx;								\
 })
 
+#define bitmap32_test_bit(bitmap, bit) ({ \
+	(bitmap)[(bit) / 32] & (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_set_bit(bitmap, bit) ({ \
+	(bitmap)[(bit) / 32] |= (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_clear_bit(bitmap, bit) ({ \
+	(bitmap)[(bit) / 32] &= ~(1 << ((bit) % 32)); \
+})
+
 #include <linux/list.h>
 
 static inline int list_is_first(const struct list_head *list,
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 0616d70..ee9efa5 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -29,9 +29,11 @@
 #define GUC_CLIENT_PRIORITY_NORMAL	3
 #define GUC_CLIENT_PRIORITY_NUM		4
 
-#define GUC_MAX_STAGE_DESCRIPTORS	1024
+#define GUC_MAX_STAGE_DESCRIPTORS	2032
 #define	GUC_INVALID_STAGE_ID		GUC_MAX_STAGE_DESCRIPTORS
 
+#define GUC_MAX_LRC_PER_CLASS		64
+
 #define GUC_RENDER_ENGINE		0
 #define GUC_VIDEO_ENGINE		1
 #define GUC_BLITTER_ENGINE		2
@@ -70,9 +72,12 @@
 #define GUC_DOORBELL_DISABLED		0
 
 #define GUC_STAGE_DESC_ATTR_ACTIVE	BIT(0)
-#define GUC_STAGE_DESC_ATTR_PENDING_DB	BIT(1)
-#define GUC_STAGE_DESC_ATTR_KERNEL	BIT(2)
-#define GUC_STAGE_DESC_ATTR_PREEMPT	BIT(3)
+#define GUC_STAGE_DESC_ATTR_TYPE_SHIFT	1
+#define GUC_STAGE_DESC_ATTR_PRINCIPAL	(0x0 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_PROXY	(0x1 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_REAL	(0x2 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_TYPE_MASK	(0x3 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_KERNEL	(1 << 3)
 #define GUC_STAGE_DESC_ATTR_RESET	BIT(4)
 #define GUC_STAGE_DESC_ATTR_WQLOCKED	BIT(5)
 #define GUC_STAGE_DESC_ATTR_PCH		BIT(6)
@@ -281,9 +286,10 @@ struct guc_process_desc {
 	u64 wq_base_addr;
 	u32 wq_size_bytes;
 	u32 wq_status;
-	u32 engine_presence;
 	u32 priority;
-	u32 reserved[30];
+	u32 token;
+	u32 queue_engine_error;
+	u32 reserved[23];
 } __packed;
 
 /* engine id and context id is packed into guc_execlist_context.context_id*/
@@ -294,16 +300,19 @@ struct guc_process_desc {
 struct guc_execlist_context {
 	u32 context_desc;
 	u32 context_id;
-	u32 ring_status;
+	u32 reserved0;
 	u32 ring_lrca;
 	u32 ring_begin;
 	u32 ring_end;
 	u32 ring_next_free_location;
 	u32 ring_current_tail_pointer_value;
-	u8 engine_state_submit_value;
-	u8 engine_state_wait_value;
-	u16 pagefault_count;
-	u16 engine_submit_queue_count;
+	u32 engine_state_wait_value;
+	u32 state_reserved;
+	u32 is_present_in_sq;
+	u32 sync_value;
+	u32 sync_addr;
+	u32 slpc_hints;
+	u32 reserved1[4];
 } __packed;
 
 /*
@@ -316,36 +325,30 @@ struct guc_execlist_context {
  * with the GuC, being allocated before the GuC is loaded with its firmware.
  */
 struct guc_stage_desc {
-	u32 sched_common_area;
+	u64 desc_private;
 	u32 stage_id;
-	u32 pas_id;
-	u8 engines_used;
+	u32 proxy_id;
 	u64 db_trigger_cpu;
 	u32 db_trigger_uk;
 	u64 db_trigger_phy;
-	u16 db_id;
-
-	struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
-
-	u8 attribute;
-
-	u32 priority;
-
+	u32 db_id;
+	struct guc_execlist_context lrc[GUC_MAX_ENGINE_CLASSES][GUC_MAX_LRC_PER_CLASS];
+	u32 lrc_bitmap[GUC_MAX_ENGINE_CLASSES][3];
+	u32 lrc_count;
+	u32 max_lrc_per_class;
+	u32 attribute; /* GUC_STAGE_DESC_ATTR_xxx */
+	u32 priority; /* GUC_CLIENT_PRIORITY_xxx */
 	u32 wq_sampled_tail_offset;
 	u32 wq_total_submit_enqueues;
-
 	u32 process_desc;
 	u32 wq_addr;
 	u32 wq_size;
-
-	u32 engine_presence;
-
-	u8 engine_suspended;
-
-	u8 reserved0[3];
-	u64 reserved1[1];
-
-	u64 desc_private;
+	u32 feature0;
+	u32 feature1;
+	u32 feature2;
+	u32 queue_engine_error;
+	u32 reserved[2];
+	u64 reserved3[12];
 } __packed;
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 195adbd..6d8814f 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -46,17 +46,22 @@
  * that contains all required pages for these elements).
  *
  * GuC stage descriptor:
- * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * During initialization, the driver allocates a static pool of descriptors
+ * and shares them with the GuC. This stage descriptor lets the GuC know about
+ * the doorbell, workqueue and process descriptor, additionally it stores
+ * information about all possible HW contexts that use it (64 x number of
+ * engine classes of guc_execlist_context structs).
+ *
+ * The idea is that every direct-submission GuC client gets one SW Context ID
+ * and every HW context created by that client gets one SW Counter. The "SW
+ * Context ID" and "SW Counter" to use now get passed on every work queue item.
+ *
+ * But we don't have direct submission yet: does that mean we are limited to 64
+ * contexts in total (one client)? Not really: we can use extra GuC context
+ * descriptors to store more HW contexts. They are special in that they don't
+ * have their own work queue, doorbell or process descriptor. Instead, these
+ * "principal" GuC context descriptors use the one that belongs to the client
+ * as a "proxy" for submission (a generalization of the old proxy submission).
  *
  * The Scratch registers:
  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -171,6 +176,16 @@ static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
 	return &base[client->stage_id];
 }
 
+static struct guc_stage_desc *__get_ppal_stage_desc(struct intel_guc *guc,
+						    u32 index)
+{
+	struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
+
+	GEM_BUG_ON(index >= GUC_MAX_STAGE_DESCRIPTORS);
+
+	return &base[index];
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -344,70 +359,20 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 static void guc_stage_desc_init(struct intel_guc *guc,
 				struct intel_guc_client *client)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx = client->owner;
 	struct guc_stage_desc *desc;
-	unsigned int tmp;
 	u32 gfx_addr;
 
 	desc = __get_stage_desc(client);
 	memset(desc, 0, sizeof(*desc));
 
 	desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+			  GUC_STAGE_DESC_ATTR_PROXY |
 			  GUC_STAGE_DESC_ATTR_KERNEL;
-	if (is_high_priority(client))
-		desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
 	desc->stage_id = client->stage_id;
+	desc->proxy_id = client->stage_id;
 	desc->priority = client->priority;
 	desc->db_id = client->doorbell_id;
 
-	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
-		u32 guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
-
-		/* TODO: We have a design issue to be solved here. Only when we
-		 * receive the first batch, we know which engine is used by the
-		 * user. But here GuC expects the lrc and ring to be pinned. It
-		 * is not an issue for default context, which is the only one
-		 * for now who owns a GuC client. But for future owner of GuC
-		 * client, need to make sure lrc is pinned prior to enter here.
-		 */
-		if (!ce->state)
-			break;	/* XXX: continue? */
-
-		/*
-		 * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy
-		 * submission or, in other words, not using a direct submission
-		 * model) the KMD's LRCA is not used for any work submission.
-		 * Instead, the GuC uses the LRCA of the user mode context (see
-		 * guc_add_request below).
-		 */
-		lrc->context_desc = lower_32_bits(ce->lrc_desc);
-
-		/* The state page is after PPHWSP */
-		lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) +
-				 LRC_STATE_PN * PAGE_SIZE;
-
-		/* XXX: In direct submission, the GuC wants the HW context id
-		 * here. In proxy submission, it wants the stage id
-		 */
-		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
-				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
-
-		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
-		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
-		lrc->ring_next_free_location = lrc->ring_begin;
-		lrc->ring_current_tail_pointer_value = 0;
-
-		desc->engines_used |= (1 << guc_engine_id);
-	}
-
-	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
-			 client->engines, desc->engines_used);
-	WARN_ON(desc->engines_used == 0);
-
 	/*
 	 * The doorbell, process descriptor, and workqueue are all parts
 	 * of the client object, which the GuC will reference via the GGTT
@@ -430,7 +395,15 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
 	struct guc_stage_desc *desc;
 
 	desc = __get_stage_desc(client);
-	memset(desc, 0, sizeof(*desc));
+	/* No memset: the stage desc might still be used as a principal */
+	desc->attribute &= ~GUC_STAGE_DESC_ATTR_TYPE_MASK;
+	desc->db_id = 0;
+	desc->db_trigger_phy = 0;
+	desc->db_trigger_cpu = 0;
+	desc->db_trigger_uk = 0;
+	desc->process_desc = 0;
+	desc->wq_addr = 0;
+	desc->wq_size = 0;
 }
 
 /* Construct a Work Item and append it to the GuC's Work Queue */
@@ -1292,6 +1265,97 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
 	engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
 }
 
+static void guc_ppal_stage_attach(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine)
+{
+	struct intel_guc *guc = &ctx->i915->guc;
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct guc_stage_desc *desc;
+
+	GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+	desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+	if (desc->lrc_count == 0) {
+		desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+				  GUC_STAGE_DESC_ATTR_PRINCIPAL |
+				  GUC_STAGE_DESC_ATTR_KERNEL;
+		desc->stage_id = ce->sw_context_id;
+		/*
+		 * HACK: this, which  is a breakage of the proxy/ppal
+		 * abstraction, is actually required by the GuC starting
+		 * v23.122 to avoid a firmware crash. We have received a
+		 * promise to remove this limitation, but in the meantime
+		 * we need an up-to-date firmware for the PO (hence this
+		 * hack).
+		 */
+		desc->process_desc =
+			__get_stage_desc(guc->execbuf_client)->process_desc;
+	}
+
+	GEM_BUG_ON(bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+				     ce->sw_counter));
+	bitmap32_set_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+	desc->lrc_count++;
+
+	/* GuC optimizations */
+	if (ce->sw_counter >= desc->max_lrc_per_class)
+		desc->max_lrc_per_class = ce->sw_counter + 1;
+}
+
+static void guc_ppal_stage_detach(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine)
+{
+	struct intel_guc *guc = &ctx->i915->guc;
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct guc_stage_desc *desc;
+	struct guc_execlist_context *lrc;
+
+	GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+	desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+	GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+				      ce->sw_counter));
+	bitmap32_clear_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+	desc->lrc_count--;
+
+	if (desc->lrc_count == 0) {
+		desc->attribute = 0;
+		desc->stage_id = 0;
+		desc->max_lrc_per_class = 0;
+	} else {
+		/* TODO: GuC optimizations */
+	}
+
+	lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+	memset(lrc, 0, sizeof(*lrc));
+}
+
+static void guc_ppal_stage_update(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine)
+{
+	struct intel_guc *guc = &ctx->i915->guc;
+	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct guc_stage_desc *desc;
+	struct guc_execlist_context *lrc;
+
+	GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+	desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+	GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+				      ce->sw_counter));
+
+	lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+
+	lrc->context_desc = lower_32_bits(ce->lrc_desc);
+	lrc->context_id = upper_32_bits(ce->lrc_desc);
+	lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + LRC_STATE_PN * PAGE_SIZE;
+	lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
+	lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
+}
+
 int intel_guc_submission_enable(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
@@ -1314,17 +1378,26 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 
 	GEM_BUG_ON(!guc->execbuf_client);
 
+	guc->ctx_alloc_hook = guc_ppal_stage_attach;
+	guc->ctx_free_hook = guc_ppal_stage_detach;
+	guc->ctx_update_hook = guc_ppal_stage_update;
+
+	for_each_engine(engine, dev_priv, id) {
+		guc_ppal_stage_attach(dev_priv->kernel_context, engine);
+		guc_ppal_stage_update(dev_priv->kernel_context, engine);
+	}
+
 	guc_reset_wq(guc->execbuf_client);
 	if (guc->preempt_client)
 		guc_reset_wq(guc->preempt_client);
 
 	err = intel_guc_sample_forcewake(guc);
 	if (err)
-		return err;
+		goto err_clear_ctx_hooks;
 
 	err = guc_clients_doorbell_init(guc);
 	if (err)
-		return err;
+		goto err_clear_ctx_hooks;
 
 	/* Take over from manual control of ELSP (execlists) */
 	guc_interrupts_capture(dev_priv);
@@ -1335,6 +1408,12 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	}
 
 	return 0;
+
+err_clear_ctx_hooks:
+	guc->ctx_alloc_hook = NULL;
+	guc->ctx_free_hook = NULL;
+	guc->ctx_update_hook = NULL;
+	return err;
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1345,6 +1424,10 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 
 	guc_interrupts_release(dev_priv);
 	guc_clients_doorbell_fini(guc);
+
+	guc->ctx_alloc_hook = NULL;
+	guc->ctx_free_hook = NULL;
+	guc->ctx_update_hook = NULL;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
1.9.1



More information about the Intel-gfx-trybot mailing list