[Intel-gfx] [PATCH 2/2] drm/i915: Engine relative MMIO for Gen12

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Sep 24 09:12:17 UTC 2019


On 24/09/2019 00:51, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> Gen12 introduces a completely new and different scheme for
> implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This
> requires using the base address of instance zero of the relevant
> engine class. And then, it is only valid if the register in
> question falls within a certain range as specified by a table.
> 
> v2: Add support for fused parts where instance zero of a given engine
> class may be missing. Make aux_tables presence a device flag rather
> than just hard coded. Pre-calculate the correct LRI base address
> rather than using a per-instance base and then adding on a delta to
> the correct base later. Make all the table range checking a debug only
> feature - the theory is that all driver access should be within the
> remap ranges. [Daniele]
> 
> v3: Re-base on Mika's changes. This removes all the abstraction layer.
> Which also means there is no common code path that all LRI accesses go
> via. Thus it is not possible to implement the range check. However, as
> noted in v2, the range check should not be necessary anyway.
> 
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> CC: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 27 +++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  1 +
>   drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  9 ++++---
>   drivers/gpu/drm/i915/gt/intel_mocs.c         |  6 ++---
>   drivers/gpu/drm/i915/i915_perf.c             |  3 ++-
>   drivers/gpu/drm/i915/intel_device_info.c     | 14 ++++++++++
>   drivers/gpu/drm/i915/intel_device_info.h     |  1 +
>   7 files changed, 49 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5aa58e069806..09fbbffce419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
>   	return true;
>   }
>   
> -static void lri_init(struct intel_engine_cs *engine)
> +static void lri_init(struct intel_engine_cs *engine, u32 first_instance)
>   {
>   	if (i915_engine_has_relative_lri(engine)) {
> -		engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
> -		engine->lri_mmio_base = 0;
> +		if (INTEL_GEN(engine->i915) < 12) {
> +			engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11;
> +			engine->lri_mmio_base = 0;//engine->mmio_base;

No C++ style comments please.

> +			engine->lri_engine = engine;
> +		} else {
> +			engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12;
> +
> +			engine->lri_engine = engine->gt->engine_class
> +						[engine->class][first_instance];
> +			GEM_BUG_ON(!engine->lri_engine);
> +			engine->lri_mmio_base = engine->lri_engine->mmio_base;
> +
> +			/* According to the bspec, accesses should be compared

Preferred style:

/*
  * Blah
  */


> +			 * against the remap table and remapping only enabled
> +			 * if found. In practice, all driver accesses should be
> +			 * to remap registers. So, no need for the complication
> +			 * of checking against any device specific tables.
> +			 */
> +		}
>   	} else {
>   		engine->lri_cmd_mode = 0;
>   		engine->lri_mmio_base = engine->mmio_base;
> +		engine->lri_engine = engine;
>   	}

If it helps at all you could save one indentation level by doing:

if (!supports_lri) {

} else if (GEN == 11) {

} else {

}

>   }
>   
> @@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   	/* Nothing to do here, execute in order of dependencies */
>   	engine->schedule = NULL;
>   
> -	lri_init(engine);
> +	lri_init(engine,
> +		 INTEL_INFO(engine->i915)->first_instance[engine->class]);
>   
>   	seqlock_init(&engine->stats.lock);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 38f486f7f7e3..62caa74e8558 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -308,6 +308,7 @@ struct intel_engine_cs {
>   
>   	u32 lri_mmio_base;
>   	u32 lri_cmd_mode;
> +	struct intel_engine_cs *lri_engine;
>   
>   	u32 uabi_capabilities;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index bfb51d8d7ce2..9c5be384026f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -133,15 +133,16 @@
>    *   simply ignores the register load under certain conditions.
>    * - One can actually load arbitrary many arbitrary registers: Simply issue x
>    *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> - * - Newer hardware supports engine relative addressing but older hardware does
> - *   not. This is required for hw engine load balancing. Hence the MI_LRI
> - *   instruction itself is prefixed with '__' and should only be used on
> - *   legacy hardware code paths. Generic code must always use the MI_LRI
> + * - Newer hardware supports engine relative addressing but using multiple
> + *   incompatible schemes. This is required for hw engine load balancing. Hence
> + *   the MI_LRI instruction itself is prefixed with '__' and should only be

Is the double underscore reference out of date now?

> + *   used on legacy hardware code paths. Generic code must always use the MI_LRI
>    *   and i915_get_lri_reg() helper functions instead.
>    */
>   #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
>   /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
>   #define   MI_LRI_FORCE_POSTED		(1<<12)
> +#define   MI_LRI_MMIO_REMAP_GEN12		BIT(17)
>   #define   MI_LRI_ADD_CS_MMIO_START_GEN11	BIT(19)
>   #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
>   #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 8e8fe3deb95c..e121fea5b33c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt)
>   static int emit_mocs_control_table(struct i915_request *rq,
>   				   const struct drm_i915_mocs_table *table)
>   {
> -	enum intel_engine_id engine = rq->engine->id;
> +	enum intel_engine_id engine_id = rq->engine->lri_engine->id;

Observation: If you don't rename the local diff is smaller.

>   	unsigned int index;
>   	u32 unused_value;
>   	u32 *cs;
> @@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq,
>   	for (index = 0; index < table->size; index++) {
>   		u32 value = get_entry_control(table, index);
>   
> -		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> +		*cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
>   		*cs++ = value;
>   	}
>   
>   	/* All remaining entries are also unused */
>   	for (; index < table->n_entries; index++) {
> -		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> +		*cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index));
>   		*cs++ = unused_value;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 8b85cdfada21..fd628ae12343 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
>   
>   	/*
>   	 * NB: The LRI instruction is generated by the hardware.
> -	 * Should we read it in and assert that the offset flag is set?
> +	 * Should we read it in and assert that the appropriate
> +	 * offset flag is set?
>   	 */
>   
>   	CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 85e480bdc673..dece012d3ad4 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -1030,6 +1030,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
>   	u32 media_fuse;
>   	u16 vdbox_mask;
>   	u16 vebox_mask;
> +	bool found;
>   
>   	if (INTEL_GEN(dev_priv) < 11)
>   		return;
> @@ -1040,6 +1041,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
>   	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
>   		      GEN11_GT_VEBOX_DISABLE_SHIFT;
>   
> +	found = false;
>   	for (i = 0; i < I915_MAX_VCS; i++) {
>   		if (!HAS_ENGINE(dev_priv, _VCS(i))) {
>   			vdbox_mask &= ~BIT(i);
> @@ -1059,11 +1061,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
>   		 */
>   		if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0)
>   			RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i);
> +
> +		if (!found) {
> +			found = true;
> +			info->first_instance[VIDEO_DECODE_CLASS] = i;
> +		}
>   	}
>   	DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n",
>   			 vdbox_mask, VDBOX_MASK(dev_priv));
>   	GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv));
>   
> +	found = false;
>   	for (i = 0; i < I915_MAX_VECS; i++) {
>   		if (!HAS_ENGINE(dev_priv, _VECS(i))) {
>   			vebox_mask &= ~BIT(i);
> @@ -1073,6 +1081,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
>   		if (!(BIT(i) & vebox_mask)) {
>   			info->engine_mask &= ~BIT(_VECS(i));
>   			DRM_DEBUG_DRIVER("vecs%u fused off\n", i);
> +			continue;
> +		}
> +
> +		if (!found) {
> +			found = true;
> +			info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i;
>   		}
>   	}
>   	DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n",
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 0cdc2465534b..75a9950969fb 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -152,6 +152,7 @@ struct intel_device_info {
>   	u8 gen;
>   	u8 gt; /* GT number, 0 if undefined */
>   	intel_engine_mask_t engine_mask; /* Engines supported by the HW */
> +	u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */

u32 is overkill however do you even need to store this persistently? 
Looks like all that is needed at runtime is engine->lri_engine? So you 
could lookup the first instance from intel_engine_setup. Or if I missed 
something and you do need it at runtime, then should go to runtime info 
since we want to avoid dependencies on mkwrite_device_info where possible.

>   
>   	enum intel_platform platform;
>   
> 

Regards,

Tvrtko

P.S. Just a superficial scan, not a normal review.


More information about the Intel-gfx mailing list