[PATCH v3 09/14] drm/i915/gt: Always use MCR functions on multicast registers
Balasubramani Vivekanandan
balasubramani.vivekanandan at intel.com
Mon Oct 17 16:53:58 UTC 2022
On 14.10.2022 16:02, Matt Roper wrote:
> Rather than relying on the implicit behavior of intel_uncore_*()
> functions, let's always use the intel_gt_mcr_*() functions to operate on
> multicast/replicated registers.
>
> v2:
> - Add TLB invalidation registers
>
> v3:
> - Switch more uncore operations in mmio_invalidate_full() to MCR
> operations for Xe_HP. (Bala)
>
> Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
> Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_gt.c | 58 ++++++++++++++++-------
> drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++---
> drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 +++--
> drivers/gpu/drm/i915/intel_pm.c | 19 ++++----
> 4 files changed, 65 insertions(+), 37 deletions(-)
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan at intel.com>
Regards,
Bala
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index e14f159ad9fc..3df0d0336dbc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1017,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
> return rb;
> }
>
> +/*
> + * HW architecture suggest typical invalidation time at 40us,
> + * with pessimistic cases up to 100us and a recommendation to
> + * cap at 1ms. We go a bit higher just in case.
> + */
> +#define TLB_INVAL_TIMEOUT_US 100
> +#define TLB_INVAL_TIMEOUT_MS 4
> +
> +/*
> + * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
> + * but are now considered MCR registers. Since they exist within a GAM range,
> + * the primary instance of the register rolls up the status from each unit.
> + */
> +static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +{
> + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> + return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
> + TLB_INVAL_TIMEOUT_US,
> + TLB_INVAL_TIMEOUT_MS);
> + else
> + return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
> + TLB_INVAL_TIMEOUT_US,
> + TLB_INVAL_TIMEOUT_MS,
> + NULL);
> +}
> +
> static void mmio_invalidate_full(struct intel_gt *gt)
> {
> static const i915_reg_t gen8_regs[] = {
> @@ -1048,7 +1074,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> unsigned int num = 0;
>
> if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> - regs = xehp_regs;
> + regs = NULL;
> num = ARRAY_SIZE(xehp_regs);
> } else if (GRAPHICS_VER(i915) == 12) {
> regs = gen12_regs;
> @@ -1075,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> if (!intel_engine_pm_is_awake(engine))
> continue;
>
> - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> - if (!i915_mmio_reg_offset(rb.reg))
> - continue;
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + intel_gt_mcr_multicast_write_fw(gt,
> + xehp_regs[engine->class],
> + BIT(engine->instance));
> + } else {
> + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> + if (!i915_mmio_reg_offset(rb.reg))
> + continue;
>
> - intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> + intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> + }
> awake |= engine->mask;
> }
>
> @@ -1099,22 +1131,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
> for_each_engine_masked(engine, gt, awake, tmp) {
> struct reg_and_bit rb;
>
> - /*
> - * HW architecture suggest typical invalidation time at 40us,
> - * with pessimistic cases up to 100us and a recommendation to
> - * cap at 1ms. We go a bit higher just in case.
> - */
> - const unsigned int timeout_us = 100;
> - const unsigned int timeout_ms = 4;
> -
> rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> - if (__intel_wait_for_register_fw(uncore,
> - rb.reg, rb.bit, 0,
> - timeout_us, timeout_ms,
> - NULL))
> +
> + if (wait_for_invalidate(gt, rb))
> drm_err_ratelimited(>->i915->drm,
> "%s TLB invalidation did not complete in %ums!\n",
> - engine->name, timeout_ms);
> + engine->name, TLB_INVAL_TIMEOUT_MS);
> }
>
> /*
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index ecfa5baa5e3f..49fdd509527a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -7,6 +7,7 @@
>
> #include "intel_engine.h"
> #include "intel_gt.h"
> +#include "intel_gt_mcr.h"
> #include "intel_gt_regs.h"
> #include "intel_mocs.h"
> #include "intel_ring.h"
> @@ -609,17 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
> 0; \
> i++)
>
> -static void init_l3cc_table(struct intel_uncore *uncore,
> +static void init_l3cc_table(struct intel_gt *gt,
> const struct drm_i915_mocs_table *table)
> {
> unsigned int i;
> u32 l3cc;
>
> for_each_l3cc(l3cc, table, i)
> - if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50))
> - intel_uncore_write_fw(uncore, XEHP_LNCFCMOCS(i), l3cc);
> + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> + intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
> else
> - intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
> + intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
> }
>
> void intel_mocs_init_engine(struct intel_engine_cs *engine)
> @@ -639,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> init_mocs_table(engine, &table);
>
> if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
> - init_l3cc_table(engine->uncore, &table);
> + init_l3cc_table(engine->gt, &table);
> }
>
> static u32 global_mocs_offset(void)
> @@ -675,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
> * memory transactions including guc transactions
> */
> if (flags & HAS_RENDER_L3CC)
> - init_l3cc_table(gt->uncore, &table);
> + init_l3cc_table(gt, &table);
> }
>
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 9229243992c2..5b86b2e286e0 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -10,12 +10,15 @@
> */
>
> #include "gt/intel_gt.h"
> +#include "gt/intel_gt_mcr.h"
> #include "gt/intel_gt_regs.h"
> #include "intel_guc_fw.h"
> #include "i915_drv.h"
>
> -static void guc_prepare_xfer(struct intel_uncore *uncore)
> +static void guc_prepare_xfer(struct intel_gt *gt)
> {
> + struct intel_uncore *uncore = gt->uncore;
> +
> u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
> GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
> GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
> @@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
>
> if (GRAPHICS_VER(uncore->i915) == 9) {
> /* DOP Clock Gating Enable for GuC clocks */
> - intel_uncore_rmw(uncore, GEN8_MISCCPCTL,
> - 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
> + intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
> + GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
> + intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
>
> /* allows for 5us (in 10ns units) before GT can go to RC6 */
> intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
> @@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
> struct intel_uncore *uncore = gt->uncore;
> int ret;
>
> - guc_prepare_xfer(uncore);
> + guc_prepare_xfer(gt);
>
> /*
> * Note that GuC needs the CSS header plus uKernel code to be copied
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 390802245514..cb18e45f6adf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -30,6 +30,8 @@
> #include "display/skl_watermark.h"
>
> #include "gt/intel_engine_regs.h"
> +#include "gt/intel_gt.h"
> +#include "gt/intel_gt_mcr.h"
> #include "gt/intel_gt_regs.h"
>
> #include "i915_drv.h"
> @@ -4321,22 +4323,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> u32 val;
>
> /* WaTempDisableDOPClkGating:bdw */
> - misccpctl = intel_uncore_rmw(&dev_priv->uncore, GEN8_MISCCPCTL, ~GEN8_DOP_CLOCK_GATE_ENABLE,
> - 0);
> + misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
> + ~GEN8_DOP_CLOCK_GATE_ENABLE, 0);
>
> - val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1);
> + val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
> val &= ~L3_PRIO_CREDITS_MASK;
> val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
> val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
> - intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val);
> + intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val);
>
> /*
> * Wait at least 100 clocks before re-enabling clock gating.
> * See the definition of L3SQCREG1 in BSpec.
> */
> - intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1);
> + intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
> udelay(1);
> - intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl);
> + intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, misccpctl);
> }
>
> static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
> @@ -4496,9 +4498,8 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
> gen9_init_clock_gating(dev_priv);
>
> /* WaDisableDopClockGating:skl */
> - intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL,
> - intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL) &
> - ~GEN8_DOP_CLOCK_GATE_ENABLE);
> + intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
> + GEN8_DOP_CLOCK_GATE_ENABLE, 0);
>
> /* WAC6entrylatency:skl */
> intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
> --
> 2.37.3
>
More information about the dri-devel
mailing list