[Intel-gfx] [PATCH 2/2] drm/i915: Engine relative MMIO for Gen12
John.C.Harrison at Intel.com
John.C.Harrison at Intel.com
Thu Sep 19 06:47:10 UTC 2019
From: John Harrison <John.C.Harrison at Intel.com>
Gen12 introduces a completely new and different scheme for
implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
requires using the base address of instance zero of the relevant
engine class. And then, it is only valid if the register in
question falls within a certain range as specified by a table.
v2: Add support for fused parts where instance zero of a given engine
class may be missing. Make aux_tables presence a device flag rather
than just hard coded. Pre-calculate the correct LRI base address
rather than using a per-instance base and then adding on a delta to
the correct base later. Make all the table range checking a debug only
feature - the theory is that all driver access should be within the
remap ranges. [Daniele]
v3: Re-base on Mika's changes. This removes all the abstraction layer.
Which also means there is no common code path that all LRI accesses go
via. Thus it is not possible to implement the range check. However,
as noted in v2, the range check should not be necessary anyway.
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
CC: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 27 +++++++++++++++++---
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 ++++---
drivers/gpu/drm/i915/gt/intel_mocs.c | 6 ++---
drivers/gpu/drm/i915/i915_perf.c | 3 ++-
drivers/gpu/drm/i915/intel_device_info.c | 14 ++++++++++
drivers/gpu/drm/i915/intel_device_info.h | 1 +
7 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 6e51269ab2ec..e485b22a383e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
return true;
}
-static void lri_init(struct intel_engine_cs *engine)
+static void lri_init(struct intel_engine_cs *engine, u32 first_instance)
{
if (i915_engine_has_relative_lri(engine)) {
- engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
- engine->lri_mmio_base = 0;
+ if (INTEL_GEN(engine->i915) < 12) {
+ engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
+ engine->lri_mmio_base = 0;//engine->mmio_base;
+ engine->lri_engine = engine;
+ } else {
+ engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12;
+
+ engine->lri_engine = engine->gt->engine_class
+ [engine->class][first_instance];
+ GEM_BUG_ON(!engine->lri_engine);
+ engine->lri_mmio_base = engine->lri_engine->mmio_base;
+
+ /* According to the bspec, accesses should be compared
+ * against the remap table and remapping only enabled
+ * if found. In practice, all driver accesses should be
+ * to remap registers. So, no need for the complication
+ * of checking against any device specific tables.
+ */
+ }
} else {
engine->lri_cmd_mode = 0;
engine->lri_mmio_base = engine->mmio_base;
+ engine->lri_engine = engine;
}
}
@@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
- lri_init(engine);
+ lri_init(engine,
+ INTEL_INFO(engine->i915)->first_instance[engine->class]);
seqlock_init(&engine->stats.lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 38f486f7f7e3..62caa74e8558 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -308,6 +308,7 @@ struct intel_engine_cs {
u32 lri_mmio_base;
u32 lri_cmd_mode;
+ struct intel_engine_cs *lri_engine;
u32 uabi_capabilities;
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index bfb51d8d7ce2..9c5be384026f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -133,15 +133,16 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- * - Newer hardware supports engine relative addressing but older hardware does
- * not. This is required for hw engine load balancing. Hence the MI_LRI
- * instruction itself is prefixed with '__' and should only be used on
- * legacy hardware code paths. Generic code must always use the MI_LRI
+ * - Newer hardware supports engine relative addressing but using multiple
+ * incompatible schemes. This is required for hw engine load balancing. Hence
+ * the MI_LRI instruction itself is prefixed with '__' and should only be
+ * used on legacy hardware code paths. Generic code must always use the MI_LRI
* and i915_get_lri_reg() helper functions instead.
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_MMIO_REMAP_GEN12 BIT(17)
#define MI_LRI_ADD_CS_MMIO_START_GEN11 BIT(19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 8e8fe3deb95c..e121fea5b33c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt)
static int emit_mocs_control_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table)
{
- enum intel_engine_id engine = rq->engine->id;
+ enum intel_engine_id engine_id = rq->engine->lri_engine->id;
unsigned int index;
u32 unused_value;
u32 *cs;
@@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq,
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
*cs++ = unused_value;
}
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8b85cdfada21..fd628ae12343 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
/*
* NB: The LRI instruction is generated by the hardware.
- * Should we read it in and assert that the offset flag is set?
+ * Should we read it in and assert that the appropriate
+ * offset flag is set?
*/
CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 728c881718a2..e168a0cb8093 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -989,6 +989,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
u32 media_fuse;
u16 vdbox_mask;
u16 vebox_mask;
+ bool found;
if (INTEL_GEN(dev_priv) < 11)
return;
@@ -999,6 +1000,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
GEN11_GT_VEBOX_DISABLE_SHIFT;
+ found = false;
for (i = 0; i < I915_MAX_VCS; i++) {
if (!HAS_ENGINE(dev_priv, _VCS(i))) {
vdbox_mask &= ~BIT(i);
@@ -1018,11 +1020,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
*/
if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
+
+ if (!found) {
+ found = true;
+ info->first_instance[VIDEO_DECODE_CLASS] = i;
+ }
}
DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
vdbox_mask, VDBOX_MASK(dev_priv));
GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv));
+ found = false;
for (i = 0; i < I915_MAX_VECS; i++) {
if (!HAS_ENGINE(dev_priv, _VECS(i))) {
vebox_mask &= ~BIT(i);
@@ -1032,6 +1040,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
if (!(BIT(i) & vebox_mask)) {
info->engine_mask &= ~BIT(_VECS(i));
DRM_DEBUG_DRIVER("vecs%u fused off\n", i);
+ continue;
+ }
+
+ if (!found) {
+ found = true;
+ info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i;
}
}
DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n",
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index d4c288860aed..b5b9c1d9162a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -151,6 +151,7 @@ struct intel_device_info {
u8 gen;
u8 gt; /* GT number, 0 if undefined */
intel_engine_mask_t engine_mask; /* Engines supported by the HW */
+ u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */
enum intel_platform platform;
--
2.21.0.5.gaeb582a983
More information about the Intel-gfx
mailing list