[Intel-gfx] [PATCH 2/2] drm/i915: Consolidate TLB invalidation flow
Andrzej Hajda
andrzej.hajda at intel.com
Wed Jan 4 08:46:03 UTC 2023
On 03.01.2023 20:57, Matt Roper wrote:
> On Mon, Dec 19, 2022 at 05:10:02PM +0100, Andrzej Hajda wrote:
>> On 19.12.2022 11:13, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>>
>>> As the logic for selecting the register and corresponsing values grew, the
>>
>> corresponding
>>
>>> code become a bit unsightly. Consolidate by storing the required values at
>>> engine init time in the engine itself, and by doing so minimise the amount
>>> of invariant platform and engine checks during each and every TLB
>>> invalidation.
>>>
>>> v2:
>>> * Fail engine probe if TLB invlidations registers are unknown.
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> Cc: Andrzej Hajda <andrzej.hajda at intel.com>
>>> Cc: Matt Roper <matthew.d.roper at intel.com>
>>> Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com> # v1
>>> ---
>>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 93 +++++++++++++
>>> drivers/gpu/drm/i915/gt/intel_engine_types.h | 15 +++
>>> drivers/gpu/drm/i915/gt/intel_gt.c | 135 +++----------------
>>> 3 files changed, 128 insertions(+), 115 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> index 99c4b866addd..d47dadfc25c8 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> @@ -1143,12 +1143,105 @@ static int init_status_page(struct intel_engine_cs *engine)
>>> return ret;
>>> }
>>> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
>>> +{
>>> + static const union intel_engine_tlb_inv_reg gen8_regs[] = {
>>> + [RENDER_CLASS].reg = GEN8_RTCR,
>>> + [VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, /* , GEN8_M2TCR */
>>> + [VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR,
>>> + [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
>>> + };
>>> + static const union intel_engine_tlb_inv_reg gen12_regs[] = {
>>> + [RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR,
>>> + [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
>>> + [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
>>> + [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
>>> + [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
>>> + };
>>> + static const union intel_engine_tlb_inv_reg xehp_regs[] = {
>>> + [RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR,
>>> + [VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR,
>>> + [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
>>> + [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR,
>>> + [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR,
>>> + };
>>> + struct drm_i915_private *i915 = engine->i915;
>>> + const union intel_engine_tlb_inv_reg *regs;
>>> + union intel_engine_tlb_inv_reg reg;
>>> + unsigned int class = engine->class;
>>> + unsigned int num = 0;
>>> + u32 val;
>>> +
>>> + /*
>>> + * New platforms should not be added with catch-all-newer (>=)
>>> + * condition so that any later platform added triggers the below warning
>>> + * and in turn mandates a human cross-check of whether the invalidation
>>> + * flows have compatible semantics.
>>> + *
>>> + * For instance with the 11.00 -> 12.00 transition three out of five
>>> + * respective engine registers were moved to masked type. Then after the
>>> + * 12.00 -> 12.50 transition multi cast handling is required too.
>>> + */
>>> +
>>> + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50)) {
>
> This is bad...it only captures XEHPSDV and breaks the handling of DG2
> (12.55), PVC (12.60), and MTL (12.70, 12.71, and 12.72). You're not
> hitting the warning as expected since those are all now being captured
> by the next case of the if/else ladder. With the way GMD_ID works, we
> may also get new version numbers that silently show up in hardware too
> at some point (e.g., 12.73, 12.74, etc.)
>
>>> + regs = xehp_regs;
>>> + num = ARRAY_SIZE(xehp_regs);
>>> + } else if (GRAPHICS_VER(i915) == 12) {
>
> You'd want to change this to
>
> GRAPHICS_VER_FULL(i915) == IP_VER(12, 0)
>
> to get the behavior you expected.
According to dg1_info dg1 has IP_VER(12, 10), it will not fit into this
bucket.
Regards
Andrzej
>
>
> Matt
>
>>> + regs = gen12_regs;
>>> + num = ARRAY_SIZE(gen12_regs);
>>> + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
>>> + regs = gen8_regs;
>>> + num = ARRAY_SIZE(gen8_regs);
>>> + } else if (GRAPHICS_VER(i915) < 8) {
>>> + return 0;
>>> + } > +
>>> + if (drm_WARN_ONCE(&i915->drm, !num,
>>> + "Platform does not implement TLB invalidation!"))
>>> + return -ENODEV;
>>> +
>>> + if (drm_WARN_ON_ONCE(&i915->drm,
>>> + class >= num ||
>>> + (!regs[class].reg.reg &&
>>> + !regs[class].mcr_reg.reg)))
>>> + return -ERANGE;
>>
>> I hope the propagation of -ERANGE to device probe is OK.
>>
>> Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com>
>>
>> Regards
>> Andrzej
>>
>>> +
>>> + reg = regs[class];
>>> +
>>> + if (GRAPHICS_VER(i915) == 8 && class == VIDEO_DECODE_CLASS) {
>>> + reg.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
>>> + val = 0;
>>> + } else {
>>> + val = engine->instance;
>>> + }
>>> +
>>> + val = BIT(val);
>>> +
>>> + engine->tlb_inv.mcr = regs == xehp_regs;
>>> + engine->tlb_inv.reg = reg;
>>> + engine->tlb_inv.done = val;
>>> +
>>> + if (GRAPHICS_VER(i915) >= 12 &&
>>> + (engine->class == VIDEO_DECODE_CLASS ||
>>> + engine->class == VIDEO_ENHANCEMENT_CLASS ||
>>> + engine->class == COMPUTE_CLASS))
>>> + engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
>>> + else
>>> + engine->tlb_inv.request = val;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int engine_setup_common(struct intel_engine_cs *engine)
>>> {
>>> int err;
>>> init_llist_head(&engine->barrier_tasks);
>>> + err = intel_engine_init_tlb_invalidation(engine);
>>> + if (err)
>>> + return err;
>>> +
>>> err = init_status_page(engine);
>>> if (err)
>>> return err;
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> index 4fd54fb8810f..8c661fe89314 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> @@ -341,6 +341,19 @@ struct intel_engine_guc_stats {
>>> u64 start_gt_clk;
>>> };
>>> +union intel_engine_tlb_inv_reg {
>>> + i915_reg_t reg;
>>> + i915_mcr_reg_t mcr_reg;
>>> +};
>>> +
>>> +struct intel_engine_tlb_inv
>>> +{
>>> + bool mcr;
>>> + union intel_engine_tlb_inv_reg reg;
>>> + u32 request;
>>> + u32 done;
>>> +};
>>> +
>>> struct intel_engine_cs {
>>> struct drm_i915_private *i915;
>>> struct intel_gt *gt;
>>> @@ -372,6 +385,8 @@ struct intel_engine_cs {
>>> u32 context_size;
>>> u32 mmio_base;
>>> + struct intel_engine_tlb_inv tlb_inv;
>>> +
>>> /*
>>> * Some w/a require forcewake to be held (which prevents RC6) while
>>> * a particular engine is active. If so, we set fw_domain to which
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
>>> index 854841a731cb..9fb0ac03f51a 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>> @@ -983,36 +983,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
>>> intel_sseu_dump(&info->sseu, p);
>>> }
>>> -struct reg_and_bit {
>>> - union {
>>> - i915_reg_t reg;
>>> - i915_mcr_reg_t mcr_reg;
>>> - };
>>> - u32 bit;
>>> -};
>>> -
>>> -static struct reg_and_bit
>>> -get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
>>> - const i915_reg_t *regs, const unsigned int num)
>>> -{
>>> - const unsigned int class = engine->class;
>>> - struct reg_and_bit rb = { };
>>> -
>>> - if (drm_WARN_ON_ONCE(&engine->i915->drm,
>>> - class >= num || !regs[class].reg))
>>> - return rb;
>>> -
>>> - rb.reg = regs[class];
>>> - if (gen8 && class == VIDEO_DECODE_CLASS)
>>> - rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
>>> - else
>>> - rb.bit = engine->instance;
>>> -
>>> - rb.bit = BIT(rb.bit);
>>> -
>>> - return rb;
>>> -}
>>> -
>>> /*
>>> * HW architecture suggest typical invalidation time at 40us,
>>> * with pessimistic cases up to 100us and a recommendation to
>>> @@ -1026,14 +996,20 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
>>> * but are now considered MCR registers. Since they exist within a GAM range,
>>> * the primary instance of the register rolls up the status from each unit.
>>> */
>>> -static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>>> +static int wait_for_invalidate(struct intel_engine_cs *engine)
>>> {
>>> - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
>>> - return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
>>> + if (engine->tlb_inv.mcr)
>>> + return intel_gt_mcr_wait_for_reg(engine->gt,
>>> + engine->tlb_inv.reg.mcr_reg,
>>> + engine->tlb_inv.done,
>>> + 0,
>>> TLB_INVAL_TIMEOUT_US,
>>> TLB_INVAL_TIMEOUT_MS);
>>> else
>>> - return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
>>> + return __intel_wait_for_register_fw(engine->gt->uncore,
>>> + engine->tlb_inv.reg.reg,
>>> + engine->tlb_inv.done,
>>> + 0,
>>> TLB_INVAL_TIMEOUT_US,
>>> TLB_INVAL_TIMEOUT_MS,
>>> NULL);
>>> @@ -1041,61 +1017,14 @@ static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
>>> static void mmio_invalidate_full(struct intel_gt *gt)
>>> {
>>> - static const i915_reg_t gen8_regs[] = {
>>> - [RENDER_CLASS] = GEN8_RTCR,
>>> - [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
>>> - [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
>>> - [COPY_ENGINE_CLASS] = GEN8_BTCR,
>>> - };
>>> - static const i915_reg_t gen12_regs[] = {
>>> - [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
>>> - [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
>>> - [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
>>> - [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
>>> - [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
>>> - };
>>> - static const i915_mcr_reg_t xehp_regs[] = {
>>> - [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
>>> - [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
>>> - [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
>>> - [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
>>> - [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
>>> - };
>>> struct drm_i915_private *i915 = gt->i915;
>>> struct intel_uncore *uncore = gt->uncore;
>>> struct intel_engine_cs *engine;
>>> intel_engine_mask_t awake, tmp;
>>> enum intel_engine_id id;
>>> - const i915_reg_t *regs;
>>> - unsigned int num = 0;
>>> unsigned long flags;
>>> - /*
>>> - * New platforms should not be added with catch-all-newer (>=)
>>> - * condition so that any later platform added triggers the below warning
>>> - * and in turn mandates a human cross-check of whether the invalidation
>>> - * flows have compatible semantics.
>>> - *
>>> - * For instance with the 11.00 -> 12.00 transition three out of five
>>> - * respective engine registers were moved to masked type. Then after the
>>> - * 12.00 -> 12.50 transition multi cast handling is required too.
>>> - */
>>> -
>>> - if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50)) {
>>> - regs = NULL;
>>> - num = ARRAY_SIZE(xehp_regs);
>>> - } else if (GRAPHICS_VER(i915) == 12) {
>>> - regs = gen12_regs;
>>> - num = ARRAY_SIZE(gen12_regs);
>>> - } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
>>> - regs = gen8_regs;
>>> - num = ARRAY_SIZE(gen8_regs);
>>> - } else if (GRAPHICS_VER(i915) < 8) {
>>> - return;
>>> - }
>>> -
>>> - if (drm_WARN_ONCE(&i915->drm, !num,
>>> - "Platform does not implement TLB invalidation!"))
>>> + if (GRAPHICS_VER(i915) < 8)
>>> return;
>>> intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>>> @@ -1105,33 +1034,18 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>> awake = 0;
>>> for_each_engine(engine, gt, id) {
>>> - struct reg_and_bit rb;
>>> -
>>> if (!intel_engine_pm_is_awake(engine))
>>> continue;
>>> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
>>> - u32 val = BIT(engine->instance);
>>> -
>>> - if (engine->class == VIDEO_DECODE_CLASS ||
>>> - engine->class == VIDEO_ENHANCEMENT_CLASS ||
>>> - engine->class == COMPUTE_CLASS)
>>> - val = _MASKED_BIT_ENABLE(val);
>>> + if (engine->tlb_inv.mcr)
>>> intel_gt_mcr_multicast_write_fw(gt,
>>> - xehp_regs[engine->class],
>>> - val);
>>> - } else {
>>> - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
>>> - if (!i915_mmio_reg_offset(rb.reg))
>>> - continue;
>>> -
>>> - if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
>>> - engine->class == VIDEO_ENHANCEMENT_CLASS ||
>>> - engine->class == COMPUTE_CLASS))
>>> - rb.bit = _MASKED_BIT_ENABLE(rb.bit);
>>> -
>>> - intel_uncore_write_fw(uncore, rb.reg, rb.bit);
>>> - }
>>> + engine->tlb_inv.reg.mcr_reg,
>>> + engine->tlb_inv.request);
>>> + else
>>> + intel_uncore_write_fw(uncore,
>>> + engine->tlb_inv.reg.reg,
>>> + engine->tlb_inv.request);
>>> +
>>> awake |= engine->mask;
>>> }
>>> @@ -1150,16 +1064,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>>> intel_gt_mcr_unlock(gt, flags);
>>> for_each_engine_masked(engine, gt, awake, tmp) {
>>> - struct reg_and_bit rb;
>>> -
>>> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
>>> - rb.mcr_reg = xehp_regs[engine->class];
>>> - rb.bit = BIT(engine->instance);
>>> - } else {
>>> - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
>>> - }
>>> -
>>> - if (wait_for_invalidate(gt, rb))
>>> + if (wait_for_invalidate(engine))
>>> drm_err_ratelimited(>->i915->drm,
>>> "%s TLB invalidation did not complete in %ums!\n",
>>> engine->name, TLB_INVAL_TIMEOUT_MS);
>>
>
More information about the Intel-gfx
mailing list