[Intel-gfx] [PATCH 2/2] drm/i915: Engine relative MMIO for Gen12
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Sep 24 09:12:17 UTC 2019
On 24/09/2019 00:51, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> Gen12 introduces a completely new and different scheme for
> implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
> requires using the base address of instance zero of the relevant
> engine class. And then, it is only valid if the register in
> question falls within a certain range as specified by a table.
>
> v2: Add support for fused parts where instance zero of a given engine
> class may be missing. Make aux_tables presence a device flag rather
> than just hard coded. Pre-calculate the correct LRI base address
> rather than using a per-instance base and then adding on a delta to
> the correct base later. Make all the table range checking a debug only
> feature - the theory is that all driver access should be within the
> remap ranges. [Daniele]
>
> v3: Re-base on Mika's changes. This removes all the abstraction layer.
> Which also means there is no common code path that all LRI accesses go
> via. Thus it is not possible to implement the range check. However, as
> noted in v2, the range check should not be necessary anyway.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> CC: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 27 +++++++++++++++++---
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 ++++---
> drivers/gpu/drm/i915/gt/intel_mocs.c | 6 ++---
> drivers/gpu/drm/i915/i915_perf.c | 3 ++-
> drivers/gpu/drm/i915/intel_device_info.c | 14 ++++++++++
> drivers/gpu/drm/i915/intel_device_info.h | 1 +
> 7 files changed, 49 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5aa58e069806..09fbbffce419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
> return true;
> }
>
> -static void lri_init(struct intel_engine_cs *engine)
> +static void lri_init(struct intel_engine_cs *engine, u32 first_instance)
> {
> if (i915_engine_has_relative_lri(engine)) {
> - engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
> - engine->lri_mmio_base = 0;
> + if (INTEL_GEN(engine->i915) < 12) {
> + engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
> + engine->lri_mmio_base = 0;//engine->mmio_base;
No C++ style comments please.
> + engine->lri_engine = engine;
> + } else {
> + engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12;
> +
> + engine->lri_engine = engine->gt->engine_class
> + [engine->class][first_instance];
> + GEM_BUG_ON(!engine->lri_engine);
> + engine->lri_mmio_base = engine->lri_engine->mmio_base;
> +
> + /* According to the bspec, accesses should be compared
Preferred style:
/*
* Blah
*/
> + * against the remap table and remapping only enabled
> + * if found. In practice, all driver accesses should be
> + * to remap registers. So, no need for the complication
> + * of checking against any device specific tables.
> + */
> + }
> } else {
> engine->lri_cmd_mode = 0;
> engine->lri_mmio_base = engine->mmio_base;
> + engine->lri_engine = engine;
> }
If it helps at all you could save one indentation level by doing:
if (!supports_lri) {
} else if (GEN == 11) {
} else {
}
> }
>
> @@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> /* Nothing to do here, execute in order of dependencies */
> engine->schedule = NULL;
>
> - lri_init(engine);
> + lri_init(engine,
> + INTEL_INFO(engine->i915)->first_instance[engine->class]);
>
> seqlock_init(&engine->stats.lock);
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 38f486f7f7e3..62caa74e8558 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -308,6 +308,7 @@ struct intel_engine_cs {
>
> u32 lri_mmio_base;
> u32 lri_cmd_mode;
> + struct intel_engine_cs *lri_engine;
>
> u32 uabi_capabilities;
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index bfb51d8d7ce2..9c5be384026f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -133,15 +133,16 @@
> * simply ignores the register load under certain conditions.
> * - One can actually load arbitrary many arbitrary registers: Simply issue x
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> - * - Newer hardware supports engine relative addressing but older hardware does
> - * not. This is required for hw engine load balancing. Hence the MI_LRI
> - * instruction itself is prefixed with '__' and should only be used on
> - * legacy hardware code paths. Generic code must always use the MI_LRI
> + * - Newer hardware supports engine relative addressing but using multiple
> + * incompatible schemes. This is required for hw engine load balancing. Hence
> + * the MI_LRI instruction itself is prefixed with '__' and should only be
Is the double underscore reference out of date now?
> + * used on legacy hardware code paths. Generic code must always use the MI_LRI
> * and i915_get_lri_reg() helper functions instead.
> */
> #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
> #define MI_LRI_FORCE_POSTED (1<<12)
> +#define MI_LRI_MMIO_REMAP_GEN12 BIT(17)
> #define MI_LRI_ADD_CS_MMIO_START_GEN11 BIT(19)
> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 8e8fe3deb95c..e121fea5b33c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt)
> static int emit_mocs_control_table(struct i915_request *rq,
> const struct drm_i915_mocs_table *table)
> {
> - enum intel_engine_id engine = rq->engine->id;
> + enum intel_engine_id engine_id = rq->engine->lri_engine->id;
Observation: If you don't rename the local diff is smaller.
> unsigned int index;
> u32 unused_value;
> u32 *cs;
> @@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq,
> for (index = 0; index < table->size; index++) {
> u32 value = get_entry_control(table, index);
>
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
> *cs++ = value;
> }
>
> /* All remaining entries are also unused */
> for (; index < table->n_entries; index++) {
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
> *cs++ = unused_value;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 8b85cdfada21..fd628ae12343 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
>
> /*
> * NB: The LRI instruction is generated by the hardware.
> - * Should we read it in and assert that the offset flag is set?
> + * Should we read it in and assert that the appropriate
> + * offset flag is set?
> */
>
> CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 85e480bdc673..dece012d3ad4 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -1030,6 +1030,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
> u32 media_fuse;
> u16 vdbox_mask;
> u16 vebox_mask;
> + bool found;
>
> if (INTEL_GEN(dev_priv) < 11)
> return;
> @@ -1040,6 +1041,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
> vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
> GEN11_GT_VEBOX_DISABLE_SHIFT;
>
> + found = false;
> for (i = 0; i < I915_MAX_VCS; i++) {
> if (!HAS_ENGINE(dev_priv, _VCS(i))) {
> vdbox_mask &= ~BIT(i);
> @@ -1059,11 +1061,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
> */
> if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
> RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
> +
> + if (!found) {
> + found = true;
> + info->first_instance[VIDEO_DECODE_CLASS] = i;
> + }
> }
> DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
> vdbox_mask, VDBOX_MASK(dev_priv));
> GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv));
>
> + found = false;
> for (i = 0; i < I915_MAX_VECS; i++) {
> if (!HAS_ENGINE(dev_priv, _VECS(i))) {
> vebox_mask &= ~BIT(i);
> @@ -1073,6 +1081,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
> if (!(BIT(i) & vebox_mask)) {
> info->engine_mask &= ~BIT(_VECS(i));
> DRM_DEBUG_DRIVER("vecs%u fused off\n", i);
> + continue;
> + }
> +
> + if (!found) {
> + found = true;
> + info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i;
> }
> }
> DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n",
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 0cdc2465534b..75a9950969fb 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -152,6 +152,7 @@ struct intel_device_info {
> u8 gen;
> u8 gt; /* GT number, 0 if undefined */
> intel_engine_mask_t engine_mask; /* Engines supported by the HW */
> + u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */
u32 is overkill however do you even need to store this persistently?
Looks like all that is needed at runtime is engine->lri_engine? So you
could lookup the first instance from intel_engine_setup. Or if I missed
something and you do need it at runtime, then should go to runtime info
since we want to avoid dependencies on mkwrite_device_info where possible.
>
> enum intel_platform platform;
>
>
Regards,
Tvrtko
P.S. Just a superficial scan, not a normal review.
More information about the Intel-gfx
mailing list