[Intel-gfx] [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
Chris Wilson
chris at chris-wilson.co.uk
Thu Oct 26 14:34:21 UTC 2017
Quoting Mika Kuoppala (2017-10-26 15:01:44)
> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
>
> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck.
> The reason for naming is most likely that workaround was named before
> the most recent recommendation evolved to use the reserve bit.
>
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
>
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
>
> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_reg.h | 5 +-
> drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++---
> 2 files changed, 108 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f138eae82bf0..c04c634af5ae 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
> #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88)
> #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84)
> #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044)
> -#define FORCEWAKE_KERNEL 0x1
> -#define FORCEWAKE_USER 0x2
> +#define FORCEWAKE_KERNEL BIT(0)
> +#define FORCEWAKE_USER BIT(1)
> +#define FORCEWAKE_RESERVE BIT(12)
Why 12? How about 15?
FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK
Reserved tends to imply future hw bits. So I think s/reserve/fallback/
works throughout the patch.
> #define FORCEWAKE_MT_ACK _MMIO(0x130040)
> #define ECOBUS _MMIO(0xa180)
> #define FORCEWAKE_MT_ENABLE (1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 20e3c65c0999..fc6d090244c5 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
> HRTIMER_MODE_REL);
> }
>
> +static inline bool
> +wait_ack_clear(const struct drm_i915_private *i915,
> + const struct intel_uncore_forcewake_domain *d,
> + const u32 ack)
> +{
> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0,
> + FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +static inline bool
> +wait_ack_set(const struct drm_i915_private *i915,
> + const struct intel_uncore_forcewake_domain *d,
> + const u32 ack)
> +{
> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack),
> + FORCEWAKE_ACK_TIMEOUT_MS);
> +}
Let's make these one function, to cut down on the wait_for() expansions.
Keeping the wait_ack_clear/wait_ack_set wrappers.
static inline int
__wait_for_ack(i915, d, ack, value)
{
return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value,
FORCEWAKE_ACK_TIMEOUT_MS);
}
static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0));
static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack));
> static inline void
> fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
> const struct intel_uncore_forcewake_domain *d)
> {
> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> - FORCEWAKE_KERNEL) == 0,
> - FORCEWAKE_ACK_TIMEOUT_MS))
> + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
> DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
> intel_uncore_forcewake_domain_to_str(d->id));
> }
>
> +enum ack_type {
> + ACK_CLEAR = 0,
> + ACK_SET
> +};
> +
> +static bool
> +fw_domain_reserve_fallback(const struct drm_i915_private *i915,
> + const struct intel_uncore_forcewake_domain *d,
> + const enum ack_type type)
> +{
> + bool timeout;
> + int retry = 10;
> +
> + /* Fallback to toggle another fw bit to wake up the gpu */
The comment needs some work; the first bit itself was meant to wake the
gpu, so why is this bit any more likely to work?
> + do {
> + wait_ack_clear(i915, d, FORCEWAKE_RESERVE);
> + __raw_i915_write32(i915, d->reg_set,
> + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE));
> + wait_ack_set(i915, d, FORCEWAKE_RESERVE);
> +
> + if (type == ACK_CLEAR)
> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
> + else
> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
> +
> + __raw_i915_write32(i915, d->reg_set,
> + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE));
Blindly toggling another bit to encourage the first to succeed.
Why stop at 1 extra bit????
If the wait_ack_set(RSVD) fails, you might as well abort?
Other than my face hitting the desk, the logic makes sense to me:
Assert second fw bit; check original bit; clear second fw bit (so we
don't keep fw longer than required).
This basically also has the side-effect of making the timeout 12x
longer.
> + } while (timeout && --retry);
> +
> + return timeout;
> +}
> +
> +static inline void
> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915,
> + const struct intel_uncore_forcewake_domain *d)
> +{
> + bool timeout;
bool err; or none at all.
timeout tends to be a value we pass around telling us how long until the
timeout; not the status, which would be timedout.
> +
> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
> + if (likely(!timeout))
> + return;
> +
> + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR);
> + if (timeout)
> + fw_domain_wait_ack_clear(i915, d);
Ok, one last try and raise an error if it fails.
> +}
> +
> static inline void
> fw_domain_get(struct drm_i915_private *i915,
> const struct intel_uncore_forcewake_domain *d)
> @@ -91,14 +154,27 @@ static inline void
> fw_domain_wait_ack(const struct drm_i915_private *i915,
> const struct intel_uncore_forcewake_domain *d)
> {
> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> - FORCEWAKE_KERNEL),
> - FORCEWAKE_ACK_TIMEOUT_MS))
> + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
> DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
> intel_uncore_forcewake_domain_to_str(d->id));
> }
>
> static inline void
> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915,
> + const struct intel_uncore_forcewake_domain *d)
> +{
> + bool timeout;
> +
> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
> + if (likely(!timeout))
> + return;
> +
> + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET);
> + if (timeout)
> + fw_domain_wait_ack(i915, d);
> +}
> +
> +static inline void
> fw_domain_put(const struct drm_i915_private *i915,
> const struct intel_uncore_forcewake_domain *d)
> {
> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
> }
>
> static void
> +fw_domains_get_with_reserve(struct drm_i915_private *i915,
> + enum forcewake_domains fw_domains)
> +{
> + struct intel_uncore_forcewake_domain *d;
> + unsigned int tmp;
> +
> + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
> +
> + for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
> + fw_domain_wait_ack_clear_reserve(i915, d);
> + fw_domain_get(i915, d);
> + }
> +
> + for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> + fw_domain_wait_ack_set_reserve(i915, d);
> +
> + i915->uncore.fw_domains_active |= fw_domains;
Ok.
> +}
> +
> +static void
> fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
> {
> struct intel_uncore_forcewake_domain *d;
> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
> }
>
> if (INTEL_GEN(dev_priv) >= 9) {
> - dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
> + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */
> + dev_priv->uncore.funcs.force_wake_get =
> + fw_domains_get_with_reserve;
> dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
> fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
> FORCEWAKE_RENDER_GEN9,
> --
> 2.11.0
>
More information about the Intel-gfx
mailing list