[Intel-gfx] [PATCH 2/2] drm/i915: Engine relative MMIO for Gen12
John.C.Harrison at Intel.com
John.C.Harrison at Intel.com
Thu Aug 22 18:02:05 UTC 2019
From: John Harrison <John.C.Harrison at Intel.com>
Gen12 introduces a completely new and different scheme for
implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
requires using the base address of instance zero of the relevant
engine class. And then, it is only valid if the register in
question falls within a certain range as specified by a table.
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
CC: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 185 ++++++++++++++++++-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 +
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 +-
drivers/gpu/drm/i915/i915_perf.c | 3 +-
4 files changed, 195 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index efe1c377d797..a65e8ccd9d8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -236,6 +236,127 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+static void lri_init_remap_base(struct intel_engine_cs *engine)
+{
+ struct intel_engine_cs *remap_engine;
+
+ engine->lri_mmio_base = 0;
+
+ if (INTEL_GEN(engine->i915) < 12)
+ return;
+
+ remap_engine = engine->i915->gt.engine_class[engine->class][0];
+ GEM_BUG_ON(!remap_engine);
+
+ engine->lri_mmio_base = remap_engine->mmio_base;
+}
+
+static void lri_add_range(struct intel_engine_cs *engine, u32 min, u32 max)
+{
+ GEM_BUG_ON(engine->lri_num_ranges >= INTEL_MAX_LRI_RANGES);
+
+ engine->lri_ranges[engine->lri_num_ranges].min = min;
+ engine->lri_ranges[engine->lri_num_ranges].max = max;
+ engine->lri_num_ranges++;
+}
+
+static void lri_init_remap_ranges(struct intel_engine_cs *engine)
+{
+ bool has_aux_tables = true; /* Removed after TGL? */
+ u32 offset;
+
+ engine->lri_num_ranges = 0;
+
+ if (INTEL_GEN(engine->i915) < 12)
+ return;
+
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /* Hardware Front End */
+ lri_add_range(engine, 0x000 + engine->mmio_base,
+ 0x7FF + engine->mmio_base);
+
+ /* TRTT */
+ lri_add_range(engine, 0x4400, 0x441F);
+
+ /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+ if (has_aux_tables)
+ lri_add_range(engine, 0x4200, 0x420F);
+ break;
+
+ case VIDEO_DECODE_CLASS:
+ lri_add_range(engine, 0x0000 + engine->mmio_base,
+ 0x3FFF + engine->mmio_base);
+
+ /* TRTT */
+ offset = ((engine->instance & 0x1) * 0x20) +
+ ((engine->instance >> 1) * 0x100);
+ lri_add_range(engine, 0x4420 + offset, 0x443F + offset);
+
+ /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+ if (has_aux_tables) {
+ switch (engine->instance) {
+ case 0:
+ lri_add_range(engine, 0x4210, 0x421F);
+ break;
+
+ case 1:
+ lri_add_range(engine, 0x4220, 0x422F);
+ break;
+
+ case 2:
+ lri_add_range(engine, 0x4290, 0x429F);
+ break;
+
+ case 3:
+ lri_add_range(engine, 0x42A0, 0x42AF);
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case VIDEO_ENHANCEMENT_CLASS:
+ lri_add_range(engine, 0x0000 + engine->mmio_base,
+ 0x3FFF + engine->mmio_base);
+
+ /* TRTT */
+ offset = engine->instance * 0x100;
+ lri_add_range(engine, 0x4460 + offset, 0x447F + offset);
+
+ /* Aux Tables - REMOVEDBY(GEN:HAS:1406929672) */
+ if (has_aux_tables) {
+ switch (engine->instance) {
+ case 0:
+ lri_add_range(engine, 0x4230, 0x423F);
+ break;
+
+ case 1:
+ lri_add_range(engine, 0x42B0, 0x42BF);
+ break;
+
+ case 2:
+ lri_add_range(engine, 0x4290, 0x429F);
+ break;
+
+ case 3:
+ // Same address as instance 1???
+ lri_add_range(engine, 0x42B0, 0x42BF);
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
static u32 i915_get_lri_cmd_legacy(const struct intel_engine_cs *engine,
u32 word_count)
{
@@ -249,6 +370,27 @@ static u32 i915_get_lri_cmd_add_offset(const struct intel_engine_cs *engine,
MI_LRI_ADD_CS_MMIO_START_GEN11;
}
+static u32 i915_get_lri_cmd_remap(const struct intel_engine_cs *engine,
+ u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ /* if (lri_is_reg_in_remap_table(engine, reg)) ??? */
+ word |= MI_LRI_MMIO_REMAP_GEN12;
+
+ /*
+ * NB: To gate this on the reg address will require knowing
+ * all reg addresses in advance. This is not currently the
+ * case as some LRI commands are built from multiple sources.
+ * Also, what if some regs require remap and some do not?
+ * The LRI command would need to be split into multiple pieces.
+ */
+
+ return word;
+}
+
static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
{
if (INTEL_GEN(engine->i915) < 11)
@@ -262,18 +404,53 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
static void lri_init(struct intel_engine_cs *engine)
{
- if (i915_engine_has_relative_lri(engine))
- engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
- else
+ if (i915_engine_has_relative_lri(engine)) {
+ if (INTEL_GEN(engine->i915) < 12)
+ engine->get_lri_cmd = i915_get_lri_cmd_add_offset;
+ else {
+ engine->get_lri_cmd = i915_get_lri_cmd_remap;
+
+ lri_init_remap_base(engine);
+ lri_init_remap_ranges(engine);
+ }
+ } else
engine->get_lri_cmd = i915_get_lri_cmd_legacy;
}
+static bool lri_is_reg_in_remap_table(const struct intel_engine_cs *engine,
+ i915_reg_t reg)
+{
+ int i;
+ u32 offset = i915_mmio_reg_offset(reg);
+
+ for (i = 0; i < engine->lri_num_ranges; i++) {
+ if (offset < engine->lri_ranges[i].min)
+ continue;
+
+ if (offset > engine->lri_ranges[i].max)
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
{
if (!i915_engine_has_relative_lri(engine))
return i915_mmio_reg_offset(reg);
- return i915_mmio_reg_offset(reg) - engine->mmio_base;
+ if (INTEL_GEN(engine->i915) < 12)
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+
+ if (!WARN_ON(lri_is_reg_in_remap_table(engine, reg))) {
+ /* Is this meant to happen? */
+ return i915_mmio_reg_offset(reg);
+ }
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base +
+ engine->lri_mmio_base;
}
static void __sprint_engine_name(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7ca6c86a33f6..1e26f668e73b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -306,6 +306,13 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;
+#define INTEL_MAX_LRI_RANGES 3
+ struct lri_range {
+ u32 min, max;
+ } lri_ranges[INTEL_MAX_LRI_RANGES];
+ u32 lri_num_ranges;
+ u32 lri_mmio_base;
+
u32 (*get_lri_cmd)(const struct intel_engine_cs *engine,
u32 word_count);
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eaa019df0ce7..0ee62a61d7b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -130,14 +130,15 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- * - Newer hardware supports engine relative addressing but older hardware does
- * not. This is required for hw engine load balancing. Hence the MI_LRI
- * instruction itself is prefixed with '__' and should only be used on
- * legacy hardware code paths. Generic code must always use the MI_LRI
+ * - Newer hardware supports engine relative addressing but using multiple
+ * incompatible schemes. This is required for hw engine load balancing. Hence
+ * the MI_LRI instruction itself is prefixed with '__' and should only be
+ * used on legacy hardware code paths. Generic code must always use the MI_LRI
* and i915_get_lri_reg() helper functions instead.
*/
#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_MMIO_REMAP_GEN12 (1<<17)
#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 83abdda05ba2..f88642209283 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1694,7 +1694,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
/*
* NB: The LRI instruction is generated by the hardware.
- * Should we read it in and assert that the offset flag is set?
+ * Should we read it in and assert that the appropriate
+ * offset flag is set?
*/
CTX_REG(ce->engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
--
2.21.0.5.gaeb582a983
More information about the Intel-gfx
mailing list