[Intel-gfx] [PATCH v2 6/9] drm/i915: expose command stream timestamp frequency to userspace

Sagar Arun Kamble sagar.a.kamble at intel.com
Thu Nov 9 16:37:07 UTC 2017



On 11/9/2017 7:36 PM, Lionel Landwerlin wrote:
> On 09/11/17 11:58, Sagar Arun Kamble wrote:
>>
>>
>> On 11/2/2017 9:59 PM, Lionel Landwerlin wrote:
>>> We use to have this fixed per generation, but starting with CNL 
>>> userspace
>>> cannot tell just off the PCI ID. Let's make this information 
>>> available. This
>>> is particularly useful for performance monitoring where much of the
>>> normalization work is done using those timestamps (this include 
>>> pipeline
>>> statistics in both GL & Vulkan as well as OA reports).
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>>>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>>>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>>>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>>>   drivers/gpu/drm/i915/intel_device_info.c | 99 
>>> ++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/i915_drm.h              |  6 ++
>>>   6 files changed, 133 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 39883cd915db..0897fd616a1f 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file 
>>> *m, void *unused)
>>>              yesno(dev_priv->gt.awake));
>>>       seq_printf(m, "Global active requests: %d\n",
>>>              dev_priv->gt.active_requests);
>>> +    seq_printf(m, "CS timestamp frequency: %llu\n",
>>> +           dev_priv->info.cs_timestamp_frequency);
>> should be accessed through INTEL_INFO
>> How about adding "Hz" to message
>
> Done.
>
>>>         p = drm_seq_file_printer(m);
>>>       for_each_engine(engine, dev_priv, id)
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>>> b/drivers/gpu/drm/i915/i915_drv.c
>>> index e7e9e061073b..fdd23e79fb46 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, 
>>> void *data,
>>>           if (!value)
>>>               return -ENODEV;
>>>           break;
>>> +    case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>>> +        value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
>> losing the precision here. can we make cs_timestamp_frequency u32?
>
> Yeah, I'm not super happy about the int* of getparam.
> MAX_INT limits us up to ~2GHz, which I don't think we'll ever reach.
> Do you agree? Do you think we need to handle bigger values?
>
Yes. Agree on making this int.
>
>>> +        break;
>>>       default:
>>>           DRM_DEBUG("Unknown parameter %d\n", param->param);
>>>           return -EINVAL;
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 6cb7cd7f9420..4e804aaeaae1 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -886,6 +886,8 @@ struct intel_device_info {
>>>       /* Slice/subslice/EU info */
>>>       struct sseu_dev_info sseu;
>>>   +    uint64_t cs_timestamp_frequency;
>>> +
>> s/uint64_t/u64 - (Chris had suggested earlier)
>
> Done.
>
>>>       struct color_luts {
>>>           u16 degamma_lut_size;
>>>           u16 gamma_lut_size;
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index a2223f01ee2a..f392f28f2cfa 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -1119,9 +1119,24 @@ static inline bool 
>>> i915_mmio_reg_valid(i915_reg_t reg)
>>>     /* RPM unit config (Gen8+) */
>>>   #define RPM_CONFIG0        _MMIO(0x0D00)
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT    3
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK    (1 << 
>>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ    0
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ    1
>>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT    1
>>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>>> +
>>>   #define RPM_CONFIG1        _MMIO(0x0D04)
>>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>>   +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>>> +#define GEN8_CTC_MODE            _MMIO(0xA26C)
>>> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
>>> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK    0
>>> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC    1
>>> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT    1
>>> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>>> GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>>> +
>>>   /* RPC unit config (Gen8+) */
>>>   #define RPC_CONFIG        _MMIO(0x0D08)
>>>   @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>>>   #define ILK_TIMESTAMP_HI    _MMIO(0x70070)
>>>   #define IVB_TIMESTAMP_CTR    _MMIO(0x44070)
>>>   +#define GEN8_TIMESTAMP_OVERRIDE _MMIO(0x44074)
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT        0
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK        0x3ff
>> US_COUNTER_DIVIDER_MASK?
>
> Sure, I thought it was just a bit too long :)
>
>>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT    12
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf 
>>> << 12)
>>> +
>>>   #define _PIPE_FRMTMSTMP_A        0x70048
>>>   #define PIPE_FRMTMSTMP(pipe)        \
>>>               _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>>> b/drivers/gpu/drm/i915/intel_device_info.c
>>> index db03d179fc85..9b71a9b6d80e 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>       sseu->has_eu_pg = 0;
>>>   }
>>>   +static u64 read_timestamp_frequency_from_divide(struct 
>>> drm_i915_private *dev_priv)
>> Should this be named read_reference_ts_freq?
>
> Yes, thanks!
>
>>> +{
>>> +    u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>>> +    u64 base_freq, frac_freq;
>>> +
>>> +    base_freq = ((ts_override & 
>>> GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>>> +             GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>>> +    base_freq *= 1000000;
>>> +
>>> +    frac_freq = ((ts_override &
>>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>>> +    if (frac_freq != 0)
>>> +        frac_freq = 1000000 / (frac_freq + 1);
>> Not considering numerator?
>
> The documentation is quite terrible, but my reading is that the 
> numerator doesn't apply to any current generations.
>
Understood now. I think we should consider whether override is set 
before considering denominator too.
>>> +
>>> +    return base_freq + frac_freq;
>>> +}
>>> +
>>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>>> +{
>>> +    if (INTEL_GEN(dev_priv) <= 4) {
>>> +        /* PRMs say:
>>> +         *
>>> +         *     "The value in this register increments once every 16
>>> +         *      hclks." ("CLKCFG" register)
>>> +         *
>>> +         * Since dev_priv->rawclk_freq stores the value in kHz divided
>>> +         * by 4, we just need to divide it again by 4.
>>> +         */
>> I read this as hclk is 1/4th fsb clock and timestamp is 1/16 of hclk 
>> so this should be 16.
>
> You're right, but as the comment above explains, rawclk_freq is 
> already hclk / 4.
> Another / 4 gives us / 16.
1. hclk=1/4* fsb_clk
2. ts_clk=1/16*hclk
=> ts_clk=1/64*fsb_clk
So this should be "(dev_priv->rawclk_freq * 1000) / 16" right?
>
>>> +        return (dev_priv->rawclk_freq * 1000) / 4;
>>> +    } else if (INTEL_GEN(dev_priv) <= 7) {
>>> +        /* PRMs say:
>>> +         *
>>> +         *     "The PCU TSC counts 10ns increments; this timestamp
>>> +         *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>>> +         *      rolling over every 1.5 hours).
>>> +         */
>>> +        return 12500000;
>>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>>> +        u64 freq = 0;
>>> +
>>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>>> +        else
>>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>>> +
>>> +        /* Now figure out how the command stream's timestamp register
>>> +         * increments from this frequency (it might increment only
>>> +         * every few clock cycle).
>>> +         */
>>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
>> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
>> shift as doc says it to have 80ns base.
>>> +
>>> +        return freq;
>>> +    } else if (INTEL_GEN(dev_priv) <= 10) {
>>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>>> +        u64 freq = 0;
>>> +        u32 rpm_config_reg = 0;
>>> +
>>> +        /* First figure out the reference frequency. There are 2 ways
>>> +         * we can compute the frequency, either through the
>>> +         * TIMESTAMP_OVERRIDE register or through CTC_MODE &
>> Remove CTC_MODE as it does not itself determine the frequency.
>
> Done, thanks.
>
>>> +         * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>>> +         * one we should use.
>>> +         */
>>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>>> GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>>> +        } else {
>>> +            u32 crystal_clock;
>>> +
>>> +            rpm_config_reg = I915_READ(RPM_CONFIG0);
>>> +            crystal_clock = (rpm_config_reg &
>>> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>>> +                GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>>> +            freq = crystal_clock == 
>>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>>> +                19200000 : 24000000;
>> switch case would be better i guess.
>
> Done.
>
>>> +        }
>>> +
>>> +        /* Now figure out how the command stream's timestamp register
>>> +         * increments from this frequency (it might increment only
>>> +         * every few clock cycle).
>>> +         */
>>> +        freq >>= 3 - ((rpm_config_reg &
>>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>>> +
>>> +        return freq;
>>> +    }
>>> +
>>> +    DRM_ERROR("Unknown gen, unable to compute command stream 
>>> timestamp frequency\n");
>>> +    return 0;
>>> +}
>>> +
>>>   /*
>>>    * Determine various intel_device_info fields at runtime.
>>>    *
>>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>       else if (INTEL_GEN(dev_priv) >= 10)
>>>           gen10_sseu_info_init(dev_priv);
>>>   +    /* Initialize command stream timestamp frequency */
>>> +    info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>>> +
>>>       DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>>> hweight8(info->sseu.slice_mask));
>>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>                info->sseu.has_subslice_pg ? "y" : "n");
>>>       DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>>                info->sseu.has_eu_pg ? "y" : "n");
>>> +    DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>>> +             info->cs_timestamp_frequency);
>>>   }
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 125bde7d9504..c3ff0d4947af 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>>>    */
>>>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>>>   +/* Frequency of the command streamer timestamps given by the 
>>> *_TIMESTAMP
>>> + * registers. This used to be fixed per platform but from CNL 
>>> onwards, this
>>> + * might vary depending on the parts.
>>> + */
>>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
>>> +
>>>   typedef struct drm_i915_getparam {
>>>       __s32 param;
>>>       /*
>>
>>
>
Thanks
Sagar


More information about the Intel-gfx mailing list