[Intel-gfx] [PATCH] drm/i915/tgl: Register state context definition for Gen12

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Thu Sep 5 16:13:37 UTC 2019



On 9/5/2019 4:34 AM, Mika Kuoppala wrote:
> From: Michel Thierry <michel.thierry at intel.com>
>
> Gen12 has subtle changes in the reg state context offsets (some fields
> are gone, some are in a different location), compared to previous Gens.
>
> The simplest approach seems to be keeping Gen12 (and future platform)
> changes apart from the previous gens, while keeping the registers that
> are contiguous in functions we can reuse.
>
> v2: alias, virtual engine, rpcs, prune unused regs (Mika)
>
> Bspec: 46255
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c     | 194 +++++++++++++++++-------
>   drivers/gpu/drm/i915/gt/intel_lrc.h     |   2 +
>   drivers/gpu/drm/i915/gt/intel_lrc_reg.h |   7 +-
>   3 files changed, 146 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 87b7473a6dfb..f44c78a82915 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -810,6 +810,7 @@ static void virtual_update_register_offsets(u32 *regs,
>   
>   	/* Must match execlists_init_reg_state()! */
>   
> +	/* Common part */
>   	regs[CTX_CONTEXT_CONTROL] =
>   		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
>   	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> @@ -820,13 +821,16 @@ static void virtual_update_register_offsets(u32 *regs,
>   	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
>   	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
>   	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +
>   	regs[CTX_SECOND_BB_HEAD_U] =
>   		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
>   	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
>   	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
>   

We should do those 3 only on pre-gen12, or at least add a comment saying 
that those are not applicable to gen12+ and we rely to follow-up writes 
to overwrite them.

> +	/* PPGTT part */
>   	regs[CTX_CTX_TIMESTAMP] =
>   		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +
>   	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
>   	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
>   	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> @@ -844,8 +848,12 @@ static void virtual_update_register_offsets(u32 *regs,
>   		regs[CTX_BB_PER_CTX_PTR] =
>   			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));

CTX_BB_PER_CTX_PTR and the 2 indirect ctx entries are different on gen12 
as well

>   
> -		regs[CTX_R_PWR_CLK_STATE] =
> -			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +		if (INTEL_GEN(engine->i915) < 11)

This should be < 12

> +			regs[CTX_R_PWR_CLK_STATE] =
> +				i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +		else
> +			regs[GEN12_CTX_R_PWR_CLK_STATE] =
> +				i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
>   	}
>   }
>   
> @@ -1747,8 +1755,12 @@ __execlists_update_reg_state(struct intel_context *ce,
>   
>   	/* RPCS */
>   	if (engine->class == RENDER_CLASS) {
> -		regs[CTX_R_PWR_CLK_STATE + 1] =
> -			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> +		const u32 rpcs = intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> +		const u32 offset = INTEL_GEN(engine->i915) < 11 ?

< 12

> +			CTX_R_PWR_CLK_STATE + 1 :
> +			GEN12_CTX_R_PWR_CLK_STATE + 1;
> +
> +		regs[offset] = rpcs;
>   
>   		i915_oa_init_reg_state(engine, ce, regs);
>   	}
> @@ -3116,35 +3128,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>   	return indirect_ctx_offset;
>   }
>   
> -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> -{
> -	if (i915_is_ggtt(vm))
> -		return i915_vm_to_ggtt(vm)->alias;
> -	else
> -		return i915_vm_to_ppgtt(vm);
> -}
>   
> -static void execlists_init_reg_state(u32 *regs,
> -				     struct intel_context *ce,
> -				     struct intel_engine_cs *engine,
> -				     struct intel_ring *ring)
> +static void init_common_reg_state(u32 * const regs,
> +				  struct i915_ppgtt * const ppgtt,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_ring *ring)
>   {
> -	struct i915_ppgtt *ppgtt = vm_alias(ce->vm);
> -	bool rcs = engine->class == RENDER_CLASS;
> -	u32 base = engine->mmio_base;
> -
> -	/*
> -	 * A context is actually a big batch buffer with several
> -	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> -	 * values we are setting here are only for the first context restore:
> -	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> -	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> -	 * we are not initializing here).
> -	 *
> -	 * Must keep consistent with virtual_update_register_offsets().
> -	 */
> -	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> -				 MI_LRI_FORCE_POSTED;
> +	const u32 base = engine->mmio_base;
>   
>   	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>   		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> @@ -3162,38 +3152,44 @@ static void execlists_init_reg_state(u32 *regs,
>   	CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>   	CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>   	CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> -	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> -	if (rcs) {
> -		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> -		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> -			RING_INDIRECT_CTX_OFFSET(base), 0);
> -		if (wa_ctx->indirect_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +}
>   
> -			regs[CTX_RCS_INDIRECT_CTX + 1] =
> -				(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> -				(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
> +static void init_wa_bb_reg_state(u32 * const regs,
> +				 struct intel_engine_cs *engine,
> +				 u32 pos_bb_per_ctx)
> +{
> +	struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
> +	const u32 base = engine->mmio_base;
> +	const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
> +	const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
>   
> -			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
> -				intel_lr_indirect_ctx_offset(engine) << 6;
> -		}
> +	GEM_BUG_ON(engine->class != RENDER_CLASS);
> +	CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
> +	CTX_REG(regs, pos_indirect_ctx_offset,
> +		RING_INDIRECT_CTX_OFFSET(base), 0);
> +	if (wa_ctx->indirect_ctx.size) {
> +		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> -		if (wa_ctx->per_ctx.size) {
> -			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +		regs[pos_indirect_ctx + 1] =
> +			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
> +			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
>   
> -			regs[CTX_BB_PER_CTX_PTR + 1] =
> -				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> -		}
> +		regs[pos_indirect_ctx_offset + 1] =
> +			intel_lr_indirect_ctx_offset(engine) << 6;
>   	}
>   
> -	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +	CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
> +	if (wa_ctx->per_ctx.size) {
> +		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>   
> -	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +		regs[pos_bb_per_ctx + 1] =
> +			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> +	}
> +}
> +
> +static void init_ppgtt_reg_state(u32 *regs, u32 base,
> +				 struct i915_ppgtt *ppgtt)
> +{
>   	/* PDP values well be assigned later if needed */
>   	CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>   	CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> @@ -3216,6 +3212,40 @@ static void execlists_init_reg_state(u32 *regs,
>   		ASSIGN_CTX_PDP(ppgtt, regs, 1);
>   		ASSIGN_CTX_PDP(ppgtt, regs, 0);
>   	}
> +}
> +
> +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> +{
> +	if (i915_is_ggtt(vm))
> +		return i915_vm_to_ggtt(vm)->alias;
> +	else
> +		return i915_vm_to_ppgtt(vm);
> +}
> +
> +static void gen8_init_reg_state(u32 * const regs,
> +				struct intel_context *ce,
> +				struct intel_engine_cs *engine,
> +				struct intel_ring *ring)
> +{
> +	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
> +	const bool rcs = engine->class == RENDER_CLASS;
> +	const u32 base = engine->mmio_base;
> +
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> +				 MI_LRI_FORCE_POSTED;
> +
> +	init_common_reg_state(regs, ppgtt, engine, ring);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> +	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
>   
>   	if (rcs) {
>   		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> @@ -3227,6 +3257,58 @@ static void execlists_init_reg_state(u32 *regs,
>   		regs[CTX_END] |= BIT(0);
>   }
>   
> +static void gen12_init_reg_state(u32 * const regs,
> +				 struct intel_context *ce,
> +				 struct intel_engine_cs *engine,
> +				 struct intel_ring *ring)
> +{
> +	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);

Do we expect appgtt gen12+? If not we could use i915_vm_to_ppgtt() directly

> +	const bool rcs = engine->class == RENDER_CLASS;
> +	const u32 base = engine->mmio_base;
> +
> +	GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> +
> +	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |
> +		MI_LRI_FORCE_POSTED;
> +
> +	init_common_reg_state(regs, ppgtt, engine, ring);
> +	if (rcs)
> +		init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
> +
> +	regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> +		MI_LRI_FORCE_POSTED;
> +
> +	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> +	init_ppgtt_reg_state(regs, base, ppgtt);
> +
> +	if (rcs) {
> +		regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
> +		CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> +			0);
> +
> +		/* TODO: oa_init_reg_state ? */
> +	}
> +}
> +
> +static void execlists_init_reg_state(u32 *regs,
> +				     struct intel_context *ce,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring)
> +{
> +	/* A context is actually a big batch buffer with several
> +	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> +	 * values we are setting here are only for the first context restore:
> +	 * on a subsequent save, the GPU will recreate this batchbuffer with new
> +	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> +	 * we are not initializing here).
> +	 */
> +	if (INTEL_GEN(engine->i915) >= 12)
> +		gen12_init_reg_state(regs, ce, engine, ring);
> +	else
> +		gen8_init_reg_state(regs, ce, engine, ring);
> +}
> +
>   static int
>   populate_lr_context(struct intel_context *ce,
>   		    struct drm_i915_gem_object *ctx_obj,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index dc0252e0589e..a4bde38e35d8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -49,6 +49,8 @@ struct intel_engine_cs;
>   
>   #define	  EL_CTRL_LOAD				(1 << 0)
>   
> +#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine)	_MMIO((engine)->mmio_base + 0x2b4)
> +

As I commented on the previous revision, defining this in this patch 
without using it or setting it in the context doesn't make much sense.

>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
>    * wraps to 0."
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index b8f20ad71169..8cb6655744cd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -9,7 +9,7 @@
>   
>   #include <linux/types.h>
>   
> -/* GEN8+ Reg State Context */
> +/* GEN8 to GEN11 Reg State Context */
>   #define CTX_LRI_HEADER_0		0x01
>   #define CTX_CONTEXT_CONTROL		0x02
>   #define CTX_RING_HEAD			0x04
> @@ -39,6 +39,11 @@
>   #define CTX_R_PWR_CLK_STATE		0x42
>   #define CTX_END				0x44
>   
> +/* GEN12+ Reg State Context */
> +#define GEN12_CTX_BB_PER_CTX_PTR		0x12
> +#define GEN12_CTX_LRI_HEADER_3			0x41

Gen11 has the same number and positioning of LRIs as gen12, so marking 
this as a gen12 change doesn't feel too reflective of teh HW changes 
IMO, but not a blocker.


> +#define GEN12_CTX_R_PWR_CLK_STATE		0x42

This hasn't moved compared to gen11:
      #define CTX_R_PWR_CLK_STATE             0x42

so no need for a different defs and all those ifs higher up in the patch.

Daniele

> +
>   #define CTX_REG(reg_state, pos, reg, val) do { \
>   	u32 *reg_state__ = (reg_state); \
>   	const u32 pos__ = (pos); \



More information about the Intel-gfx mailing list