[Intel-gfx] [PATCH] drm/i915/tgl: Register state context definition for Gen12
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Thu Sep 5 16:13:37 UTC 2019
On 9/5/2019 4:34 AM, Mika Kuoppala wrote:
> From: Michel Thierry <michel.thierry at intel.com>
>
> Gen12 has subtle changes in the reg state context offsets (some fields
> are gone, some are in a different location), compared to previous Gens.
>
> The simplest approach seems to be keeping Gen12 (and future platform)
> changes apart from the previous gens, while keeping the registers that
> are contiguous in functions we can reuse.
>
> v2: alias, virtual engine, rpcs, prune unused regs (Mika)
>
> Bspec: 46255
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 194 +++++++++++++++++-------
> drivers/gpu/drm/i915/gt/intel_lrc.h | 2 +
> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 7 +-
> 3 files changed, 146 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 87b7473a6dfb..f44c78a82915 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -810,6 +810,7 @@ static void virtual_update_register_offsets(u32 *regs,
>
> /* Must match execlists_init_reg_state()! */
>
> + /* Common part */
> regs[CTX_CONTEXT_CONTROL] =
> i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
> regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> @@ -820,13 +821,16 @@ static void virtual_update_register_offsets(u32 *regs,
> regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +
> regs[CTX_SECOND_BB_HEAD_U] =
> i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
>
We should do those 3 only on pre-gen12, or at least add a comment saying
that those are not applicable to gen12+ and we rely to follow-up writes
to overwrite them.
> + /* PPGTT part */
> regs[CTX_CTX_TIMESTAMP] =
> i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +
> regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
> regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
> regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> @@ -844,8 +848,12 @@ static void virtual_update_register_offsets(u32 *regs,
> regs[CTX_BB_PER_CTX_PTR] =
> i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
CTX_BB_PER_CTX_PTR and the 2 indirect ctx entries are different on gen12
as well
>
> - regs[CTX_R_PWR_CLK_STATE] =
> - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> + if (INTEL_GEN(engine->i915) < 11)
This should be < 12
> + regs[CTX_R_PWR_CLK_STATE] =
> + i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> + else
> + regs[GEN12_CTX_R_PWR_CLK_STATE] =
> + i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> }
> }
>
> @@ -1747,8 +1755,12 @@ __execlists_update_reg_state(struct intel_context *ce,
>
> /* RPCS */
> if (engine->class == RENDER_CLASS) {
> - regs[CTX_R_PWR_CLK_STATE + 1] =
> - intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> + const u32 rpcs = intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> + const u32 offset = INTEL_GEN(engine->i915) < 11 ?
< 12
> + CTX_R_PWR_CLK_STATE + 1 :
> + GEN12_CTX_R_PWR_CLK_STATE + 1;
> +
> + regs[offset] = rpcs;
>
> i915_oa_init_reg_state(engine, ce, regs);
> }
> @@ -3116,35 +3128,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
> return indirect_ctx_offset;
> }
>
> -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> -{
> - if (i915_is_ggtt(vm))
> - return i915_vm_to_ggtt(vm)->alias;
> - else
> - return i915_vm_to_ppgtt(vm);
> -}
>
> -static void execlists_init_reg_state(u32 *regs,
> - struct intel_context *ce,
> - struct intel_engine_cs *engine,
> - struct intel_ring *ring)
> +static void init_common_reg_state(u32 * const regs,
> + struct i915_ppgtt * const ppgtt,
> + struct intel_engine_cs *engine,
> + struct intel_ring *ring)
> {
> - struct i915_ppgtt *ppgtt = vm_alias(ce->vm);
> - bool rcs = engine->class == RENDER_CLASS;
> - u32 base = engine->mmio_base;
> -
> - /*
> - * A context is actually a big batch buffer with several
> - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> - * values we are setting here are only for the first context restore:
> - * on a subsequent save, the GPU will recreate this batchbuffer with new
> - * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> - * we are not initializing here).
> - *
> - * Must keep consistent with virtual_update_register_offsets().
> - */
> - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> - MI_LRI_FORCE_POSTED;
> + const u32 base = engine->mmio_base;
>
> CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> @@ -3162,38 +3152,44 @@ static void execlists_init_reg_state(u32 *regs,
> CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> - if (rcs) {
> - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> -
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> - RING_INDIRECT_CTX_OFFSET(base), 0);
> - if (wa_ctx->indirect_ctx.size) {
> - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> +}
>
> - regs[CTX_RCS_INDIRECT_CTX + 1] =
> - (ggtt_offset + wa_ctx->indirect_ctx.offset) |
> - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
> +static void init_wa_bb_reg_state(u32 * const regs,
> + struct intel_engine_cs *engine,
> + u32 pos_bb_per_ctx)
> +{
> + struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
> + const u32 base = engine->mmio_base;
> + const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
> + const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
>
> - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
> - intel_lr_indirect_ctx_offset(engine) << 6;
> - }
> + GEM_BUG_ON(engine->class != RENDER_CLASS);
> + CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
> + CTX_REG(regs, pos_indirect_ctx_offset,
> + RING_INDIRECT_CTX_OFFSET(base), 0);
> + if (wa_ctx->indirect_ctx.size) {
> + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>
> - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> - if (wa_ctx->per_ctx.size) {
> - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> + regs[pos_indirect_ctx + 1] =
> + (ggtt_offset + wa_ctx->indirect_ctx.offset) |
> + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
>
> - regs[CTX_BB_PER_CTX_PTR + 1] =
> - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> - }
> + regs[pos_indirect_ctx_offset + 1] =
> + intel_lr_indirect_ctx_offset(engine) << 6;
> }
>
> - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> + CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
> + if (wa_ctx->per_ctx.size) {
> + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>
> - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> + regs[pos_bb_per_ctx + 1] =
> + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
> + }
> +}
> +
> +static void init_ppgtt_reg_state(u32 *regs, u32 base,
> + struct i915_ppgtt *ppgtt)
> +{
> /* PDP values well be assigned later if needed */
> CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> @@ -3216,6 +3212,40 @@ static void execlists_init_reg_state(u32 *regs,
> ASSIGN_CTX_PDP(ppgtt, regs, 1);
> ASSIGN_CTX_PDP(ppgtt, regs, 0);
> }
> +}
> +
> +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
> +{
> + if (i915_is_ggtt(vm))
> + return i915_vm_to_ggtt(vm)->alias;
> + else
> + return i915_vm_to_ppgtt(vm);
> +}
> +
> +static void gen8_init_reg_state(u32 * const regs,
> + struct intel_context *ce,
> + struct intel_engine_cs *engine,
> + struct intel_ring *ring)
> +{
> + struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
> + const bool rcs = engine->class == RENDER_CLASS;
> + const u32 base = engine->mmio_base;
> +
> + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> + MI_LRI_FORCE_POSTED;
> +
> + init_common_reg_state(regs, ppgtt, engine, ring);
> + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> + if (rcs)
> + init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
> +
> + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> +
> + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> + init_ppgtt_reg_state(regs, base, ppgtt);
>
> if (rcs) {
> regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> @@ -3227,6 +3257,58 @@ static void execlists_init_reg_state(u32 *regs,
> regs[CTX_END] |= BIT(0);
> }
>
> +static void gen12_init_reg_state(u32 * const regs,
> + struct intel_context *ce,
> + struct intel_engine_cs *engine,
> + struct intel_ring *ring)
> +{
> + struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
Do we expect appgtt gen12+? If not we could use i915_vm_to_ppgtt() directly
> + const bool rcs = engine->class == RENDER_CLASS;
> + const u32 base = engine->mmio_base;
> +
> + GEM_DEBUG_EXEC(DRM_INFO_ONCE("Using GEN12 Register State Context\n"));
> +
> + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(13) |
> + MI_LRI_FORCE_POSTED;
> +
> + init_common_reg_state(regs, ppgtt, engine, ring);
> + if (rcs)
> + init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
> +
> + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> + MI_LRI_FORCE_POSTED;
> +
> + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> +
> + init_ppgtt_reg_state(regs, base, ppgtt);
> +
> + if (rcs) {
> + regs[GEN12_CTX_LRI_HEADER_3] = MI_LOAD_REGISTER_IMM(1);
> + CTX_REG(regs, GEN12_CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> + 0);
> +
> + /* TODO: oa_init_reg_state ? */
> + }
> +}
> +
> +static void execlists_init_reg_state(u32 *regs,
> + struct intel_context *ce,
> + struct intel_engine_cs *engine,
> + struct intel_ring *ring)
> +{
> + /* A context is actually a big batch buffer with several
> + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
> + * values we are setting here are only for the first context restore:
> + * on a subsequent save, the GPU will recreate this batchbuffer with new
> + * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> + * we are not initializing here).
> + */
> + if (INTEL_GEN(engine->i915) >= 12)
> + gen12_init_reg_state(regs, ce, engine, ring);
> + else
> + gen8_init_reg_state(regs, ce, engine, ring);
> +}
> +
> static int
> populate_lr_context(struct intel_context *ce,
> struct drm_i915_gem_object *ctx_obj,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index dc0252e0589e..a4bde38e35d8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -49,6 +49,8 @@ struct intel_engine_cs;
>
> #define EL_CTRL_LOAD (1 << 0)
>
> +#define GEN12_ENGINE_SEMAPHORE_TOKEN(engine) _MMIO((engine)->mmio_base + 0x2b4)
> +
As I commented on the previous revision, defining this in this patch
without using it or setting it in the context doesn't make much sense.
> /* The docs specify that the write pointer wraps around after 5h, "After status
> * is written out to the last available status QW at offset 5h, this pointer
> * wraps to 0."
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index b8f20ad71169..8cb6655744cd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -9,7 +9,7 @@
>
> #include <linux/types.h>
>
> -/* GEN8+ Reg State Context */
> +/* GEN8 to GEN11 Reg State Context */
> #define CTX_LRI_HEADER_0 0x01
> #define CTX_CONTEXT_CONTROL 0x02
> #define CTX_RING_HEAD 0x04
> @@ -39,6 +39,11 @@
> #define CTX_R_PWR_CLK_STATE 0x42
> #define CTX_END 0x44
>
> +/* GEN12+ Reg State Context */
> +#define GEN12_CTX_BB_PER_CTX_PTR 0x12
> +#define GEN12_CTX_LRI_HEADER_3 0x41
Gen11 has the same number and positioning of LRIs as gen12, so marking
this as a gen12 change doesn't feel too reflective of teh HW changes
IMO, but not a blocker.
> +#define GEN12_CTX_R_PWR_CLK_STATE 0x42
This hasn't moved compared to gen11:
#define CTX_R_PWR_CLK_STATE 0x42
so no need for a different defs and all those ifs higher up in the patch.
Daniele
> +
> #define CTX_REG(reg_state, pos, reg, val) do { \
> u32 *reg_state__ = (reg_state); \
> const u32 pos__ = (pos); \
More information about the Intel-gfx
mailing list