[Intel-gfx] [PATCH] drm/i915: Engine relative MMIO

Rodrigo Vivi rodrigo.vivi at intel.com
Mon May 6 21:36:53 UTC 2019


On Tue, Apr 23, 2019 at 06:50:13PM -0700, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> With virtual engines, it is no longer possible to know which specific
> physical engine a given request will be executed on at the time that
> request is generated. This means that the request itself must be engine
> agnostic - any direct register writes must be relative to the engine
> and not absolute addresses.
> 
> The LRI command has support for engine relative addressing. However,
> the mechanism is not transparent to the driver. The scheme for Gen11
> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> absolute engine base component. The hardware then adds on the correct
> engine offset at execution time.
> 
> Due to the non-trivial and differing schemes on different hardware, it
> is not possible to simply update the code that creates the LRI
> commands to set a remap flag and let the hardware get on with it.
> Instead, this patch adds function wrappers for generating the LRI
> command itself and then for constructing the correct address to use
> with the LRI.
> 
> v2: Fix build break in GVT. Remove flags parameter [review feedback
> from Chris W].
> 
> v3: Fix build break in selftest. Rebase to newer base tree and fix
> merge conflict.
> 
> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> code paths [review feedback from Chris W].

First of all, would you have a rebased version after gt/ ?

So, from my point of view v3 was better than this because this spread
the __MI_LOAD_REGISTER_IMM everywhere.

Maybe I just disagree with Chris and I'd prefer a single place
like v3, but anyway we could probably arrive in an intermediate
solution like: Couldn't we do in a way that we keep the MI_LRI without
'__' and use this new function only on the paths needed?

and maybe name this function gen11_get_lri_cmd? to make it clear
that gen11+ needs to use this path.

> 
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/mmio_context.c       | 16 +++-
>  drivers/gpu/drm/i915/i915_cmd_parser.c        |  4 +-
>  drivers/gpu/drm/i915/i915_gem_context.c       | 12 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  3 +-
>  drivers/gpu/drm/i915/i915_perf.c              | 19 +++--
>  drivers/gpu/drm/i915/intel_engine_cs.c        | 11 +++
>  drivers/gpu/drm/i915/intel_gpu_commands.h     |  6 +-
>  drivers/gpu/drm/i915/intel_lrc.c              | 79 ++++++++++---------
>  drivers/gpu/drm/i915/intel_lrc_reg.h          |  4 +-
>  drivers/gpu/drm/i915/intel_mocs.c             | 17 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c       | 45 +++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h       |  4 +
>  drivers/gpu/drm/i915/intel_workarounds.c      |  4 +-
>  .../drm/i915/selftests/intel_workarounds.c    |  9 ++-
>  14 files changed, 154 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index e7e14c842be4..1b4d78e55ed6 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(count);
> +	*cs++ = i915_get_lri_cmd(req->engine, count);
>  	for (mmio = gvt->engine_mmio_list.mmio;
>  	     i915_mmio_reg_valid(mmio->reg); mmio++) {
>  		if (mmio->ring_id != ring_id ||
>  		    !mmio->in_context)
>  			continue;
>  
> -		*cs++ = i915_mmio_reg_offset(mmio->reg);
> +		*cs++ = i915_get_lri_reg(req->engine, mmio->reg);
>  		*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
>  				(mmio->mask << 16);
>  		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
> @@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> +	/*
> +	 * GEN9_GFX_MOCS is not engine relative, therefore there is no
> +	 * need for relative addressing.
> +	 */
> +	*cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>  
>  	for (index = 0; index < GEN9_MOCS_SIZE; index++) {
>  		*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
> @@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> +	/*
> +	 * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> +	 * need for relative addressing.
> +	 */
> +	*cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>  
>  	for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
>  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 503d548a55f7..91ebe18aacc6 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
>  	CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
>  	CMD(  MI_SEMAPHORE_MBOX,                SMI,   !F,  0xFF,   R  ),
>  	CMD(  MI_STORE_DWORD_INDEX,             SMI,   !F,  0xFF,   R  ),
> -	CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
> +	CMD(  __MI_LOAD_REGISTER_IMM(1),        SMI,   !F,  0xFF,   W,
>  	      .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 }    ),
>  	CMD(  MI_STORE_REGISTER_MEM,            SMI,    F,  3,     W | B,
>  	      .reg = { .offset = 1, .mask = 0x007FFFFC },
> @@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
>  					return false;
>  				}
>  
> -				if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
> +				if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
>  				    (offset + 2 > length ||
>  				     (cmd[offset + 1] & reg->mask) != reg->value)) {
>  					DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index dd728b26b5aa..f25dc613c266 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1039,11 +1039,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
>  		if (IS_ERR(cs))
>  			return PTR_ERR(cs);
>  
> -		*cs++ = MI_LOAD_REGISTER_IMM(2);
> +		*cs++ = i915_get_lri_cmd(engine, 2);
>  
> -		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> +		*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
>  		*cs++ = upper_32_bits(pd_daddr);
> -		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> +		*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
>  		*cs++ = lower_32_bits(pd_daddr);
>  
>  		*cs++ = MI_NOOP;
> @@ -1053,13 +1053,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
>  		if (IS_ERR(cs))
>  			return PTR_ERR(cs);
>  
> -		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> +		*cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
>  		for (i = GEN8_3LVL_PDPES; i--; ) {
>  			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>  
> -			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> +			*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
>  			*cs++ = upper_32_bits(pd_daddr);
> -			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> +			*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
>  			*cs++ = lower_32_bits(pd_daddr);
>  		}
>  		*cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d672c9edb94..983801f481ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1965,7 +1965,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(4);
> +	/* Gen7 only so no need to support relative offsets */
> +	*cs++ = __MI_LOAD_REGISTER_IMM(4);
>  	for (i = 0; i < 4; i++) {
>  		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
>  		*cs++ = 0;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 39a4804091d7..10d5ab991908 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
>   * in the case that the OA unit has been disabled.
>   */
>  static void
> -gen8_update_reg_state_unlocked(struct intel_context *ce,
> +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
> +			       struct intel_context *ce,
>  			       u32 *reg_state,
>  			       const struct i915_oa_config *oa_config)
>  {
> @@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
>  	};
>  	int i;
>  
> -	CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> +	/*
> +	 * NB: The LRI instruction is generated by the hardware.
> +	 * Should we read it in and assert that the offset flag is set?
> +	 */
> +
> +	CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>  		(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>  		(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>  		GEN8_OA_COUNTER_RESUME);
> @@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
>  			}
>  		}
>  
> -		CTX_REG(reg_state, state_offset, flex_regs[i], value);
> +		CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
>  	}
>  
> -	CTX_REG(reg_state,
> +	CTX_REG(engine, reg_state,
>  		CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
>  		gen8_make_rpcs(i915, &ce->sseu));
>  }
> @@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>  		ce->state->obj->mm.dirty = true;
>  		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>  
> -		gen8_update_reg_state_unlocked(ce, regs, oa_config);
> +		gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
>  
>  		i915_gem_object_unpin_map(ce->state->obj);
>  	}
> @@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>  
>  	stream = engine->i915->perf.oa.exclusive_stream;
>  	if (stream)
> -		gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
> +		gen8_update_reg_state_unlocked(engine, ce, regs,
> +					       stream->oa_config);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index eea9bec04f1b..ee33ce265820 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
>  	return bases[i].base;
>  }
>  
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
> +{
> +	if (INTEL_GEN(engine->i915) < 11)
> +		return false;
> +
> +	if (engine->id == BCS0)
> +		return false;
> +
> +	return true;
> +}
> +
>  static void __sprint_engine_name(char *name, const struct engine_info *info)
>  {
>  	WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
> diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
> index a34ece53a771..e7784b3fb759 100644
> --- a/drivers/gpu/drm/i915/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
> @@ -123,9 +123,13 @@
>   *   simply ignores the register load under certain conditions.
>   * - One can actually load arbitrary many arbitrary registers: Simply issue x
>   *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> + * - Newer hardware supports engine relative addresses but older hardware does
> + *   not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
> + *   and i915_get_lri_reg() helper functions.
>   */
> -#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
> +#define __MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
>  #define   MI_LRI_FORCE_POSTED		(1<<12)
> +#define   MI_LRI_ADD_CS_MMIO_START_GEN11	(1<<19)
>  #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
>  #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
>  #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4e0a351bfbca..41cbbcd9f0dd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1383,14 +1383,15 @@ static int emit_pdps(struct i915_request *rq)
>  		return PTR_ERR(cs);
>  
>  	/* Ensure the LRI have landed before we invalidate & continue */
> -	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
> +	*cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
> +				 MI_LRI_FORCE_POSTED;
>  	for (i = GEN8_3LVL_PDPES; i--; ) {
>  		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>  		u32 base = engine->mmio_base;
>  
> -		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> +		*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
>  		*cs++ = upper_32_bits(pd_daddr);
> -		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> +		*cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
>  		*cs++ = lower_32_bits(pd_daddr);
>  	}
>  	*cs++ = MI_NOOP;
> @@ -1464,7 +1465,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
>  	*batch++ = i915_scratch_offset(engine->i915) + 256;
>  	*batch++ = 0;
>  
> -	*batch++ = MI_LOAD_REGISTER_IMM(1);
> +	/* Gen8/9 only so no need to support relative offsets */
> +	*batch++ = __MI_LOAD_REGISTER_IMM(1);
>  	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
>  	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
>  
> @@ -1535,13 +1537,14 @@ struct lri {
>  	u32 value;
>  };
>  
> -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
> +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
> +		     const struct lri *lri, unsigned int count)
>  {
>  	GEM_BUG_ON(!count || count > 63);
>  
> -	*batch++ = MI_LOAD_REGISTER_IMM(count);
> +	*batch++ = i915_get_lri_cmd(engine, count);
>  	do {
> -		*batch++ = i915_mmio_reg_offset(lri->reg);
> +		*batch++ = i915_get_lri_reg(engine, lri->reg);
>  		*batch++ = lri->value;
>  	} while (lri++, --count);
>  	*batch++ = MI_NOOP;
> @@ -1579,7 +1582,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
>  	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>  
> -	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
> +	batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
>  
>  	/* WaMediaPoolStateCmdInWABB:bxt,glk */
>  	if (HAS_POOLED_EU(engine->i915)) {
> @@ -2728,10 +2731,10 @@ static void execlists_init_reg_state(u32 *regs,
>  	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
>  	 * we are not initializing here).
>  	 */
> -	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> -				 MI_LRI_FORCE_POSTED;
> +	regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
> +						  MI_LRI_FORCE_POSTED;
>  
> -	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> +	CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>  		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
>  		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
>  	if (INTEL_GEN(engine->i915) < 11) {
> @@ -2739,22 +2742,23 @@ static void execlists_init_reg_state(u32 *regs,
>  			_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
>  					    CTX_CTRL_RS_CTX_ENABLE);
>  	}
> -	CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> -	CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> -	CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> -	CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> +	CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> +	CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> +	CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> +	CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
>  		RING_CTL_SIZE(ring->size) | RING_VALID);
> -	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> -	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> +	CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> +	CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> +	CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> +	CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> +	CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> +	CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
>  	if (rcs) {
>  		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>  
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> +		CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
> +			RING_INDIRECT_CTX(base), 0);
> +		CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
>  			RING_INDIRECT_CTX_OFFSET(base), 0);
>  		if (wa_ctx->indirect_ctx.size) {
>  			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> @@ -2767,7 +2771,8 @@ static void execlists_init_reg_state(u32 *regs,
>  				intel_lr_indirect_ctx_offset(engine) << 6;
>  		}
>  
> -		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> +		CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
> +			RING_BB_PER_CTX_PTR(base), 0);
>  		if (wa_ctx->per_ctx.size) {
>  			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>  
> @@ -2776,18 +2781,19 @@ static void execlists_init_reg_state(u32 *regs,
>  		}
>  	}
>  
> -	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +	regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
> +						  MI_LRI_FORCE_POSTED;
>  
> -	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +	CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
>  	/* PDP values well be assigned later if needed */
> -	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> -	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> -	CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> -	CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> -	CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> -	CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> -	CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> -	CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
> + 	CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> +	CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> +	CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> +	CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> +	CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> +	CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> +	CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> +	CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>  
>  	if (i915_vm_is_4lvl(&ppgtt->vm)) {
>  		/* 64b PPGTT (48bit canonical)
> @@ -2803,8 +2809,9 @@ static void execlists_init_reg_state(u32 *regs,
>  	}
>  
>  	if (rcs) {
> -		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> -		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> +		regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
> +		CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
> +			GEN8_R_PWR_CLK_STATE, 0);
>  
>  		i915_oa_init_reg_state(engine, ce, regs);
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
> index 5ef932d810a7..40b1142d0d74 100644
> --- a/drivers/gpu/drm/i915/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
> @@ -39,10 +39,10 @@
>  #define CTX_R_PWR_CLK_STATE		0x42
>  #define CTX_END				0x44
>  
> -#define CTX_REG(reg_state, pos, reg, val) do { \
> +#define CTX_REG(engine, reg_state, pos, reg, val) do { \
>  	u32 *reg_state__ = (reg_state); \
>  	const u32 pos__ = (pos); \
> -	(reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
> +	(reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
>  	(reg_state__)[(pos__) + 1] = (val); \
>  } while (0)
>  
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 274ba78500c0..bb11d0f68bba 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
>  /**
>   * intel_mocs_init_engine() - emit the mocs control table
>   * @engine:	The engine for whom to emit the registers.
> - *
> - * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> - * given table starting at the given address.
>   */
>  void intel_mocs_init_engine(struct intel_engine_cs *engine)
>  {
> @@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
> +	*cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
>  
>  	for (index = 0; index < table->size; index++) {
>  		u32 value = get_entry_control(table, index);
>  
> -		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> +		*cs++ = i915_get_lri_reg(rq->engine,
> +					 mocs_register(engine, index));
>  		*cs++ = value;
>  	}
>  
>  	/* All remaining entries are also unused */
>  	for (; index < table->n_entries; index++) {
> -		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> +		*cs++ = i915_get_lri_reg(rq->engine,
> +					 mocs_register(engine, index));
>  		*cs++ = unused_value;
>  	}
>  
> @@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> +	/*
> +	 * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> +	 * need for relative addressing?
> +	 */
> +	*cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>  
>  	for (i = 0; i < table->size / 2; i++) {
>  		u16 low = get_entry_l3cc(table, 2 * i);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3844581f622c..107ed7c0d1fa 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1592,12 +1592,13 @@ static int load_pd_dir(struct i915_request *rq,
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(1);
> -	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
> +	/* Can these not be merged into a single LRI??? */
> +	*cs++ = i915_get_lri_cmd(engine, 1);
> +	*cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
>  	*cs++ = PP_DIR_DCLV_2G;
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(1);
> -	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
> +	*cs++ = i915_get_lri_cmd(engine, 1);
> +	*cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
>  	*cs++ = ppgtt->pd.base.ggtt_offset << 10;
>  
>  	intel_ring_advance(rq, cs);
> @@ -1662,7 +1663,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  		if (num_engines) {
>  			struct intel_engine_cs *signaller;
>  
> -			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> +			/*
> +			 * Must use absolute engine address as the register
> +			 * write is targeting a different engine.
> +			 */
> +			*cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
>  			for_each_engine(signaller, i915, id) {
>  				if (signaller == engine)
>  					continue;
> @@ -1708,7 +1713,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  			struct intel_engine_cs *signaller;
>  			i915_reg_t last_reg = {}; /* keep gcc quiet */
>  
> -			*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> +			/*
> +			 * Must use absolute engine address as the register
> +			 * write is targeting a different engine.
> +			 */
> +			*cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
>  			for_each_engine(signaller, i915, id) {
>  				if (signaller == engine)
>  					continue;
> @@ -1750,9 +1759,9 @@ static int remap_l3(struct i915_request *rq, int slice)
>  	 * here because no other code should access these registers other than
>  	 * at initialization time.
>  	 */
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
> +	*cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
>  	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> -		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
> +		*cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
>  		*cs++ = remap_info[i];
>  	}
>  	*cs++ = MI_NOOP;
> @@ -2337,3 +2346,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>  
>  	return intel_init_ring_buffer(engine);
>  }
> +
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
> +{
> +	u32 word;
> +
> +	word = __MI_LOAD_REGISTER_IMM(word_count);
> +
> +	if (i915_engine_has_relative_lri(engine))
> +		word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
> +
> +	return word;
> +}
> +
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
> +{
> +	if (!i915_engine_has_relative_lri(engine))
> +		return i915_mmio_reg_offset(reg);
> +
> +	return i915_mmio_reg_offset(reg) - engine->mmio_base;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 72c7c337ace9..261b3c433069 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -580,4 +580,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
>  	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
>  }
>  
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
> +
>  #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index b3cbed1ee1c9..a50c47993c88 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -629,9 +629,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> -	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
> +	*cs++ = i915_get_lri_cmd(rq->engine, wal->count);
>  	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> -		*cs++ = i915_mmio_reg_offset(wa->reg);
> +		*cs++ = i915_get_lri_reg(rq->engine, wa->reg);
>  		*cs++ = wa->val;
>  	}
>  	*cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index a363748a7a4f..dbe3cd4d4981 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -444,6 +444,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>  
>  	for (i = 0; i < engine->whitelist.count; i++) {
>  		u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
> +		u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
>  		u64 addr = scratch->node.start;
>  		struct i915_request *rq;
>  		u32 srm, lrm, rsvd;
> @@ -476,8 +477,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>  		idx = 1;
>  		for (v = 0; v < ARRAY_SIZE(values); v++) {
>  			/* LRI garbage */
> -			*cs++ = MI_LOAD_REGISTER_IMM(1);
> -			*cs++ = reg;
> +			*cs++ = i915_get_lri_cmd(engine, 1);
> +			*cs++ = regLRI;
>  			*cs++ = values[v];
>  
>  			/* SRM result */
> @@ -489,8 +490,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>  		}
>  		for (v = 0; v < ARRAY_SIZE(values); v++) {
>  			/* LRI garbage */
> -			*cs++ = MI_LOAD_REGISTER_IMM(1);
> -			*cs++ = reg;
> +			*cs++ = i915_get_lri_cmd(engine, 1);
> +			*cs++ = regLRI;
>  			*cs++ = ~values[v];
>  
>  			/* SRM result */
> -- 
> 2.21.0.5.gaeb582a983
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list