[Intel-gfx] [PATCH v6 08/14] drm/i915: Functions to support command submission via GuC

Wed Apr 29 15:13:29 PDT 2015

From: Alex Dai <yu.dai at intel.com>

To enable GuC command submission / scheduling, we need to setup
firmware initializaion properly. i915.enable_guc_scheduling is
introduced to enable / disable GuC submission.

GuC firmware uses the one page after Ring Context as shared data.
However, GuC uses same offset to address this page for all rings.
So we have to allocate same size of lrc context for all rings.

Also, reduce ring buffer size to 4 pages. In GuC, work queue tail is
referenced by 11 bits (WQ_RING_TAIL_MASK). It is in QW, so total 14
bits (4 pages).

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai at intel.com>
---
 drivers/gpu/drm/i915/Makefile              |  3 +-
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_params.c         |  4 ++
 drivers/gpu/drm/i915/intel_guc.h           |  8 +++
 drivers/gpu/drm/i915/intel_guc_loader.c    | 52 ++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_scheduler.c | 78 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 23 +++------
 7 files changed, 153 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_guc_scheduler.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6188302..50b2057 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -37,7 +37,8 @@ i915-y += i915_cmd_parser.o \
 	  i915_trace_points.o \
 	  intel_lrc.o \
 	  intel_ringbuffer.o \
-	  intel_uncore.o
+	  intel_uncore.o \
+	  intel_guc_scheduler.o
 
 # ancilliary microcontroller support
 i915-y += intel_uc_loader.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0b901d..2411118 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2499,6 +2499,7 @@ struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool enable_guc_scheduling;
 	int use_mmio_flip;
 	int mmio_debug;
 	bool verbose_state_checks;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index bb64415..9ad2e27 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -53,6 +53,7 @@ struct i915_params i915 __read_mostly = {
 	.mmio_debug = 0,
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
+	.enable_guc_scheduling = false,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -184,3 +185,6 @@ MODULE_PARM_DESC(verbose_state_checks,
 module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600);
 MODULE_PARM_DESC(nuclear_pageflip,
 		 "Force atomic modeset functionality; only planes work for now (default: false).");
+
+module_param_named(enable_guc_scheduling, i915.enable_guc_scheduling, bool, 0400);
+MODULE_PARM_DESC(enable_guc_scheduling, "Enable GuC scheduling (default:false)");
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 3082a3e..bf80f32 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -34,6 +34,8 @@ struct intel_guc {
 	/* GuC-specific additions */
 	uint32_t fw_ver_major;
 	uint32_t fw_ver_minor;
+
+	struct drm_i915_gem_object *ctx_pool_obj;
 };
 
 #define GUC_STATUS		0xc000
@@ -106,4 +108,10 @@ struct drm_i915_gem_object *
 intel_guc_allocate_gem_obj(struct drm_device *dev, u32 size);
 void intel_guc_release_gem_obj(struct drm_i915_gem_object *obj);
 
+/* intel_guc_scheduler.c */
+int guc_scheduler_init(struct drm_device *dev);
+void guc_scheduler_fini(struct drm_device *dev);
+int guc_scheduler_enable(struct drm_device *dev);
+void guc_scheduler_disable(struct drm_device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 49f3730..5e021db 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -229,6 +229,36 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
 
 	/* XXX: Set up log buffer */
 
+	/* If GuC scheduling is enabled, setup params here. */
+	if (i915.enable_guc_scheduling) {
+		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+		u32 ctx_in_16 = MAX_GUC_GPU_CONTEXTS / 16;
+
+		pgs >>= PAGE_SHIFT;
+		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
+			(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
+
+		/* The shared data is one page following the Ring Context.
+		 * So the offset is the page number of LRC */
+		pgs = IS_GEN9(dev_priv->dev) ? GEN9_LR_CONTEXT_RENDER_SIZE :
+				GEN8_LR_CONTEXT_RENDER_SIZE;
+		pgs >>= PAGE_SHIFT;
+		params[GUC_CTL_OFFSET] |= pgs << GUC_CTL_SHARED_DATA_SHIFT;
+
+		/* This must be non-zero for scheduler to initialize even the
+		 * firmware doesn't use it. Be note that we use separated obj
+		 * for actual ring buffer, while firmware may treat this as an
+		 * offset from Ring Context base. We must take care of this if
+		 * firmware starts using this field.
+		 */
+		params[GUC_CTL_OFFSET] |= 1 << GUC_CTL_RING_BUFFER_SHIFT;
+
+		params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
+
+		/* Unmask this bit to enable GuC scheduler */
+		params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
+	}
+
 	I915_WRITE(SOFT_SCRATCH(0), 0);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
@@ -368,14 +398,31 @@ int intel_guc_ucode_load(struct drm_device *dev, bool wait)
 	if (err)
 		goto fail;
 
+	err = guc_scheduler_init(dev);
+	if (err)
+		goto fail;
+
 	err = guc_ucode_xfer(dev);
 	if (err)
 		goto fail;
 
+	err = guc_scheduler_enable(dev);
+	if (err)
+		goto fail;
+
 	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_SUCCESS;
 	return 0;
 
 fail:
+	guc_scheduler_disable(dev);
+
+	if (i915.enable_guc_scheduling) {
+		DRM_ERROR("Failed to initialize GuC, declaring GPU wedged\n");
+		atomic_set_mask(I915_WEDGED,
+				&dev_priv->gpu_error.reset_counter);
+		i915.enable_guc_scheduling = false;
+	}
+
 	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_FAIL;
 	return err;
 }
@@ -393,6 +440,9 @@ void intel_guc_ucode_init(struct drm_device *dev)
 	struct intel_uc_fw *guc_fw = &guc->guc_fw;
 	const char *path;
 
+	if (!HAS_GUC_SCHED(dev))
+		i915.enable_guc_scheduling = false;
+
 	if (!HAS_GUC_UCODE(dev)) {
 		path = NULL;
 	} else if (IS_SKYLAKE(dev)) {
@@ -419,5 +469,7 @@ void intel_guc_ucode_fini(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
 
+	guc_scheduler_fini(dev);
+
 	intel_uc_fw_fini(guc_fw);
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c
new file mode 100644
index 0000000..1047192
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "i915_drv.h"
+#include "intel_guc.h"
+
+void guc_scheduler_fini(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_guc *guc = &dev_priv->guc;
+	struct drm_i915_gem_object *ctx_pool = guc->ctx_pool_obj;
+
+	guc_scheduler_disable(dev);
+
+	if (ctx_pool) {
+		intel_guc_release_gem_obj(ctx_pool);
+		guc->ctx_pool_obj = NULL;
+	}
+}
+
+/* Set up the resources needed by the firmware scheduler. Currently this only
+ * requires one object that can be mapped through the GGTT.
+ */
+int guc_scheduler_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	const size_t ctxsize = sizeof(struct guc_context_desc);
+	const size_t poolsize = MAX_GUC_GPU_CONTEXTS * ctxsize;
+	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
+	struct intel_guc *guc = &dev_priv->guc;
+
+	if (!i915.enable_guc_scheduling)
+		return 0; /* not enabled  */
+
+	if (guc->ctx_pool_obj)
+		return 0; /* already allocated */
+
+	guc->ctx_pool_obj = intel_guc_allocate_gem_obj(dev_priv->dev, gemsize);
+	if (!guc->ctx_pool_obj)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int guc_scheduler_enable(struct drm_device *dev)
+{
+	if (!i915.enable_guc_scheduling)
+		return 0;
+
+	/* TODO: placeholder for guc scheduler enabling */
+	return 0;
+}
+
+void guc_scheduler_disable(struct drm_device *dev)
+{
+	/* TODO: placeholder for guc scheduler disabling */
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index da58da2..8685205 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1755,20 +1755,10 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
 
 	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
 
-	switch (ring->id) {
-	case RCS:
-		if (INTEL_INFO(ring->dev)->gen >= 9)
-			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-		else
-			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-		break;
-	case VCS:
-	case BCS:
-	case VECS:
-	case VCS2:
-		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-		break;
-	}
+	if (INTEL_INFO(ring->dev)->gen >= 9)
+		ret = GEN9_LR_CONTEXT_RENDER_SIZE;
+	else
+		ret = GEN8_LR_CONTEXT_RENDER_SIZE;
 
 	return ret;
 }
@@ -1817,6 +1807,9 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 	WARN_ON(ctx->engine[ring->id].state);
 
 	context_size = round_up(get_lr_context_size(ring), 4096);
+	/* One extra page as the sharing data between driver and GuC */
+	if (i915.enable_guc_scheduling)
+		context_size += PAGE_SIZE;
 
 	ctx_obj = i915_gem_alloc_object(dev, context_size);
 	if (IS_ERR(ctx_obj)) {
@@ -1845,7 +1838,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
 
 	ringbuf->ring = ring;
 
-	ringbuf->size = 32 * PAGE_SIZE;
+	ringbuf->size = 4 * PAGE_SIZE;
 	ringbuf->effective_size = ringbuf->size;
 	ringbuf->head = 0;
 	ringbuf->tail = 0;
-- 
1.9.1