[Intel-gfx] [RFC 1/4] drm/i915/perf: Add support to correlate GPU timestamp with system time

Sagar Arun Kamble sagar.a.kamble at intel.com
Wed Dec 6 08:17:12 UTC 2017



On 12/5/2017 7:28 PM, Lionel Landwerlin wrote:
> On 15/11/17 12:25, Chris Wilson wrote:
>> Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
>>>   #include <drm/drmP.h>
>>>   #include <drm/intel-gtt.h>
>>> @@ -2149,6 +2150,14 @@ struct i915_perf_stream {
>>>           * @oa_config: The OA configuration used by the stream.
>>>           */
>>>          struct i915_oa_config *oa_config;
>>> +
>>> +       /**
>>> +        * System time correlation variables.
>>> +        */
>>> +       struct cyclecounter cc;
>>> +       spinlock_t systime_lock;
>>> +       struct timespec64 start_systime;
>>> +       struct timecounter tc;
>> This pattern is repeated a lot by struct timecounter users. (I'm still
>> trying to understand why the common case is not catered for by a
>> convenience timecounter api.)
>>
>>>   };
>>>     /**
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 00be015..72ddc34 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -192,6 +192,7 @@
>>>    */
>>>     #include <linux/anon_inodes.h>
>>> +#include <linux/clocksource.h>
>>>   #include <linux/sizes.h>
>>>   #include <linux/uuid.h>
>>>   @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct 
>>> file *file, poll_table *wait)
>>>   }
>>>     /**
>>> + * i915_cyclecounter_read - read raw cycle/timestamp counter
>>> + * @cc: cyclecounter structure
>>> + */
>>> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
>>> +{
>>> +       struct i915_perf_stream *stream = container_of(cc, 
>>> typeof(*stream), cc);
>>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>>> +       u64 ts_count;
>>> +
>>> +       intel_runtime_pm_get(dev_priv);
>>> +       ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
>>> +                                   GEN7_TIMESTAMP_UDW);
>>> +       intel_runtime_pm_put(dev_priv);
>>> +
>>> +       return ts_count;
>>> +}
>>> +
>>> +static void i915_perf_init_cyclecounter(struct i915_perf_stream 
>>> *stream)
>>> +{
>>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>>> +       int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
>>> +       struct cyclecounter *cc = &stream->cc;
>>> +       u32 maxsec;
>>> +
>>> +       cc->read = i915_cyclecounter_read;
>>> +       cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
>>> +       maxsec = cc->mask / cs_ts_freq;
>>> +
>>> +       clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
>>> +                              NSEC_PER_SEC, maxsec);
>>> +}
>>> +
>>> +static void i915_perf_init_timecounter(struct i915_perf_stream 
>>> *stream)
>>> +{
>>> +#define SYSTIME_START_OFFSET   350000 /* Counter read takes about 
>>> 350us */
>>> +       unsigned long flags;
>>> +       u64 ns;
>>> +
>>> +       i915_perf_init_cyclecounter(stream);
>>> +       spin_lock_init(&stream->systime_lock);
>>> +
>>> +       getnstimeofday64(&stream->start_systime);
>>> +       ns = timespec64_to_ns(&stream->start_systime) + 
>>> SYSTIME_START_OFFSET;
>> Use ktime directly. Or else Arnd will be back with a patch to fix it.
>> (All non-ktime interfaces are effectively deprecated; obsolete for
>> drivers.)
>>
>>> + spin_lock_irqsave(&stream->systime_lock, flags);
>>> +       timecounter_init(&stream->tc, &stream->cc, ns);
>>> +       spin_unlock_irqrestore(&stream->systime_lock, flags);
>>> +}
>>> +
>>> +/**
>>>    * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
>>>    * @stream: A disabled i915 perf stream
>>>    *
>>> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct 
>>> i915_perf_stream *stream)
>>>          /* Allow stream->ops->enable() to refer to this */
>>>          stream->enabled = true;
>>>   +       i915_perf_init_timecounter(stream);
>>> +
>>>          if (stream->ops->enable)
>>>                  stream->ops->enable(stream);
>>>   }
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index cfdf4f8..e7e6966 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -8882,6 +8882,12 @@ enum skl_power_gate {
>>>     /* Gen4+ Timestamp and Pipe Frame time stamp registers */
>>>   #define GEN4_TIMESTAMP         _MMIO(0x2358)
>>> +#define GEN7_TIMESTAMP_UDW     _MMIO(0x235C)
>>> +#define PRE_GEN7_TIMESTAMP_WIDTH       32
>>> +#define GEN7_TIMESTAMP_WIDTH           36
>>> +#define CS_TIMESTAMP_WIDTH(dev_priv) \
>>> +       (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
>>> +                                  GEN7_TIMESTAMP_WIDTH)
>> s/PRE_GEN7/GEN4/ would be consistent.
>> If you really want to add support for earlier, I9XX_.
>
> Why not use the RING_TIMESTAMP() RING_TIMESTAMP_UDW() ?
> There's already used for the reg_read ioctl.

Yes. We can use these.

>
>>
>> Ok. I can accept the justification, and we are not the only ones who do
>> the cyclecounter -> timecounter correction like this.
>> -Chris
>>
>



More information about the Intel-gfx mailing list