[Intel-gfx] [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri Oct 27 06:34:12 UTC 2017


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2017-10-26 15:01:44)
>> There is a possibility on gen9 hardware to miss the forcewake ack
>> message. The recommended workaround is to use another free
>> bit and toggle it until original bit is successfully acknowledged.
>> 
>> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck.
>> The reason for naming is most likely that workaround was named before
>> the most recent recommendation evolved to use the reserve bit.
>> 
>> Some future gen9 revs might or might not fix the underlying issue but
>> the fallback to reserve bit dance can be considered as harmless:
>> without the ack timeout we never reach the reserve bit forcewake.
>> Thus as of now we adopt a blanket approach for all gen9 and leave
>> the bypassing the reserve bit approach for future patches if
>> corresponding hw revisions do appear.
>> 
>> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
>> for forcewake request/clear ack") did increase the forcewake timeout.
>> If the issue was a delayed ack, future work could include finding
>> a suitable timeout value both for primary ack and reserve toggle
>> to reduce the worst case latency.
>> 
>> References: HSDES #1604254524
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble at intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>>  drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++---
>>  2 files changed, 108 insertions(+), 9 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index f138eae82bf0..c04c634af5ae 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7774,8 +7774,9 @@ enum {
>>  #define  FORCEWAKE_ACK_MEDIA_GEN9              _MMIO(0x0D88)
>>  #define  FORCEWAKE_ACK_RENDER_GEN9             _MMIO(0x0D84)
>>  #define  FORCEWAKE_ACK_BLITTER_GEN9            _MMIO(0x130044)
>> -#define   FORCEWAKE_KERNEL                     0x1
>> -#define   FORCEWAKE_USER                       0x2
>> +#define   FORCEWAKE_KERNEL                     BIT(0)
>> +#define   FORCEWAKE_USER                       BIT(1)
>> +#define   FORCEWAKE_RESERVE                    BIT(12)
>
> Why 12? How about 15?
>

I just mimiced the pseudocode from an errata sheet slavishly.

AFAIK 15 should work as well as 12 so 15.

> FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK

KERNEL2 would imply similar mechanism. Fallback I like more
as this is just under the hood toggling for waking up
the real bit.

>
> Reserved tends to imply future hw bits. So I think s/reserve/fallback/
> works throughout the patch.
>

Yup, fallback is better.

>>  #define  FORCEWAKE_MT_ACK                      _MMIO(0x130040)
>>  #define  ECOBUS                                        _MMIO(0xa180)
>>  #define    FORCEWAKE_MT_ENABLE                 (1<<5)
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 20e3c65c0999..fc6d090244c5 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>>                                HRTIMER_MODE_REL);
>>  }
>>  
>> +static inline bool
>> +wait_ack_clear(const struct drm_i915_private *i915,
>> +              const struct intel_uncore_forcewake_domain *d,
>> +              const u32 ack)
>> +{
>> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0,
>> +                              FORCEWAKE_ACK_TIMEOUT_MS);
>> +}
>> +
>> +static inline bool
>> +wait_ack_set(const struct drm_i915_private *i915,
>> +            const struct intel_uncore_forcewake_domain *d,
>> +            const u32 ack)
>> +{
>> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack),
>> +                              FORCEWAKE_ACK_TIMEOUT_MS);
>> +}
>
> Let's make these one function, to cut down on the wait_for() expansions.
>
> Keeping the wait_ack_clear/wait_ack_set wrappers.
>
> static inline int
> __wait_for_ack(i915, d, ack, value)
> {
> 	return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value,
> 				FORCEWAKE_ACK_TIMEOUT_MS);
> }
>
> static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0));
> static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack));
>
>>  static inline void
>>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>>                          const struct intel_uncore_forcewake_domain *d)
>>  {
>> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
>> -                            FORCEWAKE_KERNEL) == 0,
>> -                           FORCEWAKE_ACK_TIMEOUT_MS))
>> +       if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>>                 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>>                           intel_uncore_forcewake_domain_to_str(d->id));
>>  }
>>  
>> +enum ack_type {
>> +       ACK_CLEAR = 0,
>> +       ACK_SET
>> +};
>> +
>> +static bool
>> +fw_domain_reserve_fallback(const struct drm_i915_private *i915,
>> +                          const struct intel_uncore_forcewake_domain *d,
>> +                          const enum ack_type type)
>> +{
>> +       bool timeout;
>> +       int retry = 10;
>> +
>> +       /* Fallback to toggle another fw bit to wake up the gpu */
>
> The comment needs some work; the first bit itself was meant to wake the
> gpu, so why is this bit any more likely to work?
>

Yeah, this needs rework. It is more like toggling an unrelated bit
will make the 'lost or pending' ack message for the KERNEL bit to
materialize.

I thought that should we also retoggle the KERNEL one but
again, this patch mimics the pseudocode I found 1:1. There
are hints that both delaying wait and a regtogging of the
original bit was used at some point of time as a workaround.

This is _supposed_ to be the latest recommendation to combat
the issue. Efforts and success to reproduce would give us
venue to measure if it is the best known.

>> +       do {
>> +               wait_ack_clear(i915, d, FORCEWAKE_RESERVE);
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE));
>> +               wait_ack_set(i915, d, FORCEWAKE_RESERVE);
>> +
>> +               if (type == ACK_CLEAR)
>> +                       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
>> +               else
>> +                       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
>> +
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE));
>
> Blindly toggling another bit to encourage the first to succeed.
>
> Why stop at 1 extra bit????

Again 1:1 mimic of pseudocode.

What we could do instead of this toggle hammering, is to go
through all unreserved bits until one ack appears on any and
be happy.

>
> If the wait_ack_set(RSVD) fails, you might as well abort?
>

For me it looks like this toggle hammering will kick
the state machine to spit out the first ack message
that was somehow lost. I did again follow the
recomennded procedure precisely.

> Other than my face hitting the desk, the logic makes sense to me:
>
> 	Assert second fw bit; check original bit; clear second fw bit (so we
> 	don't keep fw longer than required).
>
> This basically also has the side-effect of making the timeout 12x
> longer.
>

In the worst case yes. But the first toggle should already untangle
the mess. Probability that we end up retrying all the way through
is very unlikely. Now that I have said it, it will happen :)

Thank you for the comments.

-Mika

>> +       } while (timeout && --retry);
>> +
>> +       return timeout;
>> +}
>> +
>> +static inline void
>> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915,
>> +                                const struct intel_uncore_forcewake_domain *d)
>> +{
>> +       bool timeout;
> bool err; or none at all.
>
> timeout tends to be a value we pass around telling us how long until the
> timeout; not the status, which would be timedout.
>
>> +
>> +       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
>> +       if (likely(!timeout))
>> +               return;
>> +
>> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR);
>> +       if (timeout)
>> +               fw_domain_wait_ack_clear(i915, d);
>
> Ok, one last try and raise an error if it fails.
>
>> +}
>> +
>>  static inline void
>>  fw_domain_get(struct drm_i915_private *i915,
>>               const struct intel_uncore_forcewake_domain *d)
>> @@ -91,14 +154,27 @@ static inline void
>>  fw_domain_wait_ack(const struct drm_i915_private *i915,
>>                    const struct intel_uncore_forcewake_domain *d)
>>  {
>> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
>> -                            FORCEWAKE_KERNEL),
>> -                           FORCEWAKE_ACK_TIMEOUT_MS))
>> +       if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
>>                 DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
>>                           intel_uncore_forcewake_domain_to_str(d->id));
>>  }
>>  
>>  static inline void
>> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915,
>> +                              const struct intel_uncore_forcewake_domain *d)
>> +{
>> +       bool timeout;
>> +
>> +       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
>> +       if (likely(!timeout))
>> +               return;
>> +
>> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_SET);
>> +       if (timeout)
>> +               fw_domain_wait_ack(i915, d);
>> +}
>> +
>> +static inline void
>>  fw_domain_put(const struct drm_i915_private *i915,
>>               const struct intel_uncore_forcewake_domain *d)
>>  {
>> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>>  }
>>  
>>  static void
>> +fw_domains_get_with_reserve(struct drm_i915_private *i915,
>> +                           enum forcewake_domains fw_domains)
>> +{
>> +       struct intel_uncore_forcewake_domain *d;
>> +       unsigned int tmp;
>> +
>> +       GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
>> +
>> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
>> +               fw_domain_wait_ack_clear_reserve(i915, d);
>> +               fw_domain_get(i915, d);
>> +       }
>> +
>> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp)
>> +               fw_domain_wait_ack_set_reserve(i915, d);
>> +
>> +       i915->uncore.fw_domains_active |= fw_domains;
>
> Ok.
>
>> +}
>> +
>> +static void
>>  fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>>  {
>>         struct intel_uncore_forcewake_domain *d;
>> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>>         }
>>  
>>         if (INTEL_GEN(dev_priv) >= 9) {
>> -               dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
>> +               /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */
>> +               dev_priv->uncore.funcs.force_wake_get =
>> +                       fw_domains_get_with_reserve;
>>                 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>>                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>>                                FORCEWAKE_RENDER_GEN9,
>> -- 
>> 2.11.0
>> 


More information about the Intel-gfx mailing list