[Intel-gfx] [PATCH] drm/i915/pmu: Work around compiler warnings on some kernel configs

Chris Wilson chris at chris-wilson.co.uk
Wed Mar 14 11:02:30 UTC 2018


Quoting Tvrtko Ursulin (2018-03-14 08:05:35)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> Arnd Bergman reports:
> """
> The conditional spinlock confuses gcc into thinking the 'flags' value
> might contain uninitialized data:
> 
> drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read':
> arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized]
> 
> The code is correct, but it's easy to see how the compiler gets confused
> here. This avoids the problem by pulling the lock outside of the function
> into its only caller.
> """
> 
> On deeper look it seems this is caused by paravirt spinlocks
> implementation when CONFIG_PARAVIRT_DEBUG is set, which by being
> complicated, manages to convince gcc locked parameter can be changed
> externally (impossible).
> 
> Work around it by removing the conditional locking parameters altogether.
> (It was never the most elegant code anyway.)
> 
> Slight penalty we now pay is an additional irqsave spin lock/unlock cycle
> on the event enable path. But since enable is not a fast path, that is
> preferrable to the alternative solution which was doing MMIO under irqsave
> spinlock.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reported-by: Arnd Bergmann <arnd at arndb.de>
> Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout")
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak at intel.com>
> Cc: Jani Nikula <jani.nikula at linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: David Airlie <airlied at linux.ie>
> Cc: intel-gfx at lists.freedesktop.org
> Cc: dri-devel at lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++-------------------
>  1 file changed, 13 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 4bc7aefa9541..11fb76bd3860 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -412,7 +412,7 @@ static u64 __get_rc6(struct drm_i915_private *i915)
>         return val;
>  }
>  
> -static u64 get_rc6(struct drm_i915_private *i915, bool locked)
> +static u64 get_rc6(struct drm_i915_private *i915)
>  {
>  #if IS_ENABLED(CONFIG_PM)
>         unsigned long flags;
> @@ -428,8 +428,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                  * previously.
>                  */
>  
> -               if (!locked)
> -                       spin_lock_irqsave(&i915->pmu.lock, flags);
> +               spin_lock_irqsave(&i915->pmu.lock, flags);
>  
>                 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
>                         i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> @@ -438,12 +437,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                         val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
>                 }
>  
> -               if (!locked)
> -                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +               spin_unlock_irqrestore(&i915->pmu.lock, flags);
>         } else {
>                 struct pci_dev *pdev = i915->drm.pdev;
>                 struct device *kdev = &pdev->dev;
> -               unsigned long flags2;
>  
>                 /*
>                  * We are runtime suspended.
> @@ -452,10 +449,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                  * on top of the last known real value, as the approximated RC6
>                  * counter value.
>                  */
> -               if (!locked)
> -                       spin_lock_irqsave(&i915->pmu.lock, flags);
> -
> -               spin_lock_irqsave(&kdev->power.lock, flags2);
> +               spin_lock_irqsave(&i915->pmu.lock, flags);
> +               spin_lock(&kdev->power.lock);
>  
>                 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
>                         i915->pmu.suspended_jiffies_last =
> @@ -465,14 +460,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                       i915->pmu.suspended_jiffies_last;
>                 val += jiffies - kdev->power.accounting_timestamp;
>  
> -               spin_unlock_irqrestore(&kdev->power.lock, flags2);
> +               spin_unlock(&kdev->power.lock);
>  
>                 val = jiffies_to_nsecs(val);
>                 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
>                 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
>  
> -               if (!locked)
> -                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +               spin_unlock_irqrestore(&i915->pmu.lock, flags);
>         }
>  
>         return val;
> @@ -481,7 +475,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>  #endif
>  }
>  
> -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
> +static u64 __i915_pmu_event_read(struct perf_event *event)
>  {
>         struct drm_i915_private *i915 =
>                 container_of(event->pmu, typeof(*i915), pmu.base);
> @@ -519,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
>                         val = count_interrupts(i915);
>                         break;
>                 case I915_PMU_RC6_RESIDENCY:
> -                       val = get_rc6(i915, locked);
> +                       val = get_rc6(i915);
>                         break;
>                 }
>         }
> @@ -534,7 +528,7 @@ static void i915_pmu_event_read(struct perf_event *event)
>  
>  again:
>         prev = local64_read(&hwc->prev_count);
> -       new = __i915_pmu_event_read(event, false);
> +       new = __i915_pmu_event_read(event);
>  
>         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
>                 goto again;
> @@ -584,14 +578,14 @@ static void i915_pmu_enable(struct perf_event *event)
>                 engine->pmu.enable_count[sample]++;
>         }
>  
> +       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +
>         /*
>          * Store the current counter value so we can report the correct delta
>          * for all listeners. Even when the event was already enabled and has
>          * an existing non-zero value.
>          */
> -       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
> -
> -       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
>  }

Ok, the only question then is whether pmu_enable/pmu_disable is
externally serialised. Afaict, that is true through serialisation of
event->state.

Hmm, actually instead of doing local64_set() here, it would be symmetric
with i915_pmu_event_stop() if it was done in i915_pmu_event_start():

i915_pmu_event_start {
	i915_pmu_enable(event);
	/* ... */
	i915_pmu_event_read(event);
	event->hw.state = 0;
}

i915_pmu_event_stop {
	if (flags & UPDATE)
		i915_pmu_event_read(event);
	i915_pmu_disable(event);
	event->hw.state = STOPPED;
}

Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>

Can you follow up with that adjustment for balance, if it suits you?
-Chris


More information about the Intel-gfx mailing list