[Intel-gfx] [PATCH 3/6] drm/i915: Add uncore mmio api for per-context registers
Chris Wilson
chris at chris-wilson.co.uk
Thu Feb 23 15:35:04 UTC 2017
On Wed, Feb 22, 2017 at 04:36:31PM +0000, Robert Bragg wrote:
> Since the exponent for periodic OA counter sampling is maintained in a
> per-context register while we want to treat it as if it were global
> state we need to be able to safely issue an mmio write to a per-context
> register and affect any currently running context.
>
> We have to take some extra care in this case and this adds a utility
> api to encapsulate what's required.
Been waying up the pros/cons of having this in uncore. It doesn't
attempt to be generic nor replace the existing instance, so atm I think
it might be better as local to i915_perf.
> Signed-off-by: Robert Bragg <robert at sixbynine.org>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 ++
> drivers/gpu/drm/i915/i915_reg.h | 3 ++
> drivers/gpu/drm/i915/intel_uncore.c | 73 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 80 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 105b97bd34d7..c8d03a2e89cc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -718,6 +718,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> i915_reg_t reg, unsigned int op);
>
> struct intel_uncore_funcs {
> + int (*wait_for_rcs_busy)(struct drm_i915_private *dev_priv);
> void (*force_wake_get)(struct drm_i915_private *dev_priv,
> enum forcewake_domains domains);
> void (*force_wake_put)(struct drm_i915_private *dev_priv,
> @@ -751,6 +752,7 @@ struct intel_uncore {
>
> struct intel_uncore_funcs funcs;
>
> + atomic_t hold_rcs_busy_count;
> unsigned fifo_count;
>
> enum forcewake_domains fw_domains;
> @@ -3139,6 +3141,8 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
> {
> return dev_priv->vgpu.active;
> }
> +int intel_uncore_begin_ctx_mmio(struct drm_i915_private *dev_priv);
> +void intel_uncore_end_ctx_mmio(struct drm_i915_private *dev_priv);
>
> void
> i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 141a5c1e3895..94d40e82edc1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2313,6 +2313,9 @@ enum skl_disp_power_wells {
> #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
> #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10)
>
> +#define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
> +#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
> +
> /* Fuse readout registers for GT */
> #define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
> #define CHV_FGT_DISABLE_SS0 (1 << 10)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 441c51fd9746..06bfe5f89ac5 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1274,6 +1274,16 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
> WARN_ON(dev_priv->uncore.fw_domains == 0);
> }
>
> +static int gen8_wait_for_rcs_busy(struct drm_i915_private *dev_priv)
> +{
> + return wait_for((I915_READ(GEN6_RCS_PWR_FSM) & 0x3f) == 0x30, 50);
> +}
> +
> +static int gen9_wait_for_rcs_busy(struct drm_i915_private *dev_priv)
> +{
> + return wait_for((I915_READ(GEN9_RCS_FE_FSM2) & 0x3f) == 0x30, 50);
> +}
> +
> #define ASSIGN_FW_DOMAINS_TABLE(d) \
> { \
> dev_priv->uncore.fw_domains_table = \
> @@ -1305,6 +1315,8 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
> dev_priv->uncore.funcs.mmio_writel =
> gen9_decoupled_write32;
> }
> + dev_priv->uncore.funcs.wait_for_rcs_busy =
> + gen9_wait_for_rcs_busy;
> break;
> case 8:
> if (IS_CHERRYVIEW(dev_priv)) {
> @@ -1316,6 +1328,8 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
> ASSIGN_WRITE_MMIO_VFUNCS(gen8);
> ASSIGN_READ_MMIO_VFUNCS(gen6);
> }
> + dev_priv->uncore.funcs.wait_for_rcs_busy =
> + gen8_wait_for_rcs_busy;
> break;
> case 7:
> case 6:
> @@ -1858,6 +1872,65 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> return fw_domains;
> }
>
> +static int hold_rcs_busy(struct drm_i915_private *dev_priv)
> +{
> + int ret = 0;
> +
> + if (atomic_inc_and_test(&dev_priv->uncore.hold_rcs_busy_count)) {
> + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> + _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
> +
> + ret = dev_priv->uncore.funcs.wait_for_rcs_busy(dev_priv);
> + }
> +
> + return ret;
> +}
> +
> +static void release_rcs_busy(struct drm_i915_private *dev_priv)
> +{
> + if (!atomic_dec_and_test(&dev_priv->uncore.hold_rcs_busy_count)) {
> + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> + _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
> + }
atomic_inc_and_test is correct, but atomic_dec_and_test is backwards.
Besides if you just extend the irq spinlock slightly, you get:
+int intel_uncore_begin_ctx_mmio(struct drm_i915_private *dev_priv)
+{
+ unsigned long flags;
+ i915_reg_t reg;
+ int ret;
+
+ if (INTEL_GEN(dev_priv) < 8)
+ return 0;
+
+ spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+ __intel_uncore_forcewake_get(dev_priv, FORCEWAKE_RENDER);
+
+ if (!dev_priv->uncore.rcs_hold_count++)
+ I915_WRITE_FW(GEN6_RC_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+
+ if (INTEL_GEN(dev_priv) == 8)
+ reg = GEN6_RCS_PWR_FSM;
+ else
+ reg = GEN9_RCS_FE_FSM2;
+
+ ret = wait_for_atomic((__raw_i915_read32(dev_priv, reg) & 0x3f) == 0x30,
+ 50);
+ if (ret)
+ __intel_uncore_forcewake_put(dev_priv, FORCEWAKE_RENDER);
+ spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
+
+ return ret;
+}
+
+void intel_uncore_end_ctx_mmio(struct drm_i915_private *dev_priv)
+{
+ unsigned long flags;
+
+ if (INTEL_GEN(dev_priv) < 8)
+ return;
+
+ spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+
+ if (!--dev_priv->uncore.rcs_hold_count)
+ I915_WRITE_FW(GEN6_RC_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+ __intel_uncore_forcewake_put(dev_priv, FORCEWAKE_RENDER);
+
+ spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
+}
Note as well you need to think about serialising the wakeup, not just
the hold count.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list