[Intel-gfx] [PATCH 6/7] drm/i915: expose command stream timestamp frequency to userspace
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Dec 1 20:43:06 UTC 2017
On 01/12/17 20:02, Paulo Zanoni wrote:
> Em Sex, 2017-11-10 às 19:08 +0000, Lionel Landwerlin escreveu:
>> We use to have this fixed per generation, but starting with CNL
>> userspace
>> cannot tell just off the PCI ID. Let's make this information
>> available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include
>> pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> v2: Use variables for 24MHz/19.2MHz values (Ewelina)
>> Renamed function & coding style (Sagar)
>>
>> v3: Fix frequency read on Broadwell (Sagar)
>> Fix missing divide by 4 on <= gen4 (Sagar)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Tested-by: Rafael Antognolli <rafael.antognolli at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_debugfs.c | 2 +
>> drivers/gpu/drm/i915/i915_drv.c | 3 +
>> drivers/gpu/drm/i915/i915_drv.h | 2 +
>> drivers/gpu/drm/i915/i915_reg.h | 21 ++++++
>> drivers/gpu/drm/i915/intel_device_info.c | 107
>> +++++++++++++++++++++++++++++++
>> include/uapi/drm/i915_drm.h | 6 ++
>> 6 files changed, 141 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 533ba096b9a6..57485f29d7c9 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3245,6 +3245,8 @@ static int i915_engine_info(struct seq_file *m,
>> void *unused)
>> yesno(dev_priv->gt.awake));
>> seq_printf(m, "Global active requests: %d\n",
>> dev_priv->gt.active_requests);
>> + seq_printf(m, "CS timestamp frequency: %llu Hz\n",
>> + dev_priv->info.cs_timestamp_frequency);
>>
>> p = drm_seq_file_printer(m);
>> for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index d97fe9c9439a..dbd04d5f75ee 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -419,6 +419,9 @@ static int i915_getparam(struct drm_device *dev,
>> void *data,
>> if (!value)
>> return -ENODEV;
>> break;
>> + case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> + value = INTEL_INFO(dev_priv)-
>>> cs_timestamp_frequency;
>> + break;
>> default:
>> DRM_DEBUG("Unknown parameter %d\n", param->param);
>> return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 9f26c34e0e52..ebc49ca58eb7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -885,6 +885,8 @@ struct intel_device_info {
>> /* Slice/subslice/EU info */
>> struct sseu_dev_info sseu;
>>
>> + u64 cs_timestamp_frequency;
>> +
>> struct color_luts {
>> u16 degamma_lut_size;
>> u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 7aa7dc0c336b..ec9faa11e305 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool
>> i915_mmio_reg_valid(i915_reg_t reg)
>>
>> /* RPM unit config (Gen8+) */
>> #define RPM_CONFIG0 _MMIO(0x0D00)
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 <<
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1
>> +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1
>> +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 <<
>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>> #define RPM_CONFIG1 _MMIO(0x0D04)
>> #define GEN10_GT_NOA_ENABLE (1 << 9)
>>
>> +/* GPM unit config (Gen9+) */
>> +#define CTC_MODE _MMIO(0xA26C)
>> +#define CTC_SOURCE_PARAMETER_MASK 1
>> +#define CTC_SOURCE_CRYSTAL_CLOCK 0
>> +#define CTC_SOURCE_DIVIDE_LOGIC 1
>> +#define CTC_SHIFT_PARAMETER_SHIFT 1
>> +#define CTC_SHIFT_PARAMETER_MASK (0x3 <<
>> CTC_SHIFT_PARAMETER_SHIFT)
>> +
>> /* RCP unit config (Gen8+) */
>> #define RCP_CONFIG _MMIO(0x0D08)
>>
>> @@ -8866,6 +8881,12 @@ enum skl_power_gate {
>> #define ILK_TIMESTAMP_HI _MMIO(0x70070)
>> #define IVB_TIMESTAMP_CTR _MMIO(0x44070)
>>
>> +#define GEN9_TIMESTAMP_OVERRIDE _MMIO
>> (0x44074)
>> +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0
>> +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3f
>> f
>> +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT
>> 12
>> +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK
>> (0xf << 12)
>> +
>> #define _PIPE_FRMTMSTMP_A 0x70048
>> #define PIPE_FRMTMSTMP(pipe) \
>> _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..78bf7374fbdd 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,108 @@ static void broadwell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> sseu->has_eu_pg = 0;
>> }
>>
>> +static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv)
>> +{
>> + u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE);
>> + u64 base_freq, frac_freq;
>> +
>> + base_freq = ((ts_override &
>> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
>> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIF
>> T) + 1;
>> + base_freq *= 1000000;
>> +
>> + frac_freq = ((ts_override &
>> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR
>> _MASK) >>
>> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_
>> SHIFT);
>> + if (frac_freq != 0)
>> + frac_freq = 1000000 / (frac_freq + 1);
>> +
>> + return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private
>> *dev_priv)
>> +{
>> + u64 f12_5_mhz = 12500000;
>> + u64 f19_2_mhz = 19200000;
>> + u64 f24_mhz = 24000000;
>> +
>> + if (INTEL_GEN(dev_priv) <= 4) {
>> + /* PRMs say:
>> + *
>> + * "The value in this register increments once
>> every 16
>> + * hclks." (through the “Clocking
>> Configuration”
>> + * (“CLKCFG”) MCHBAR register)
>> + */
>> + return (dev_priv->rawclk_freq * 1000) / 16;
>> + } else if (INTEL_GEN(dev_priv) <= 8) {
>> + /* PRMs say:
>> + *
>> + * "The PCU TSC counts 10ns increments; this
>> timestamp
>> + * reflects bits 38:3 of the TSC (i.e. 80ns
>> granularity,
>> + * rolling over every 1.5 hours).
>> + */
>> + return f12_5_mhz;
>> + } else if (INTEL_GEN(dev_priv) <= 9) {
>> + u32 ctc_reg = I915_READ(CTC_MODE);
>> + u64 freq = 0;
>> +
>> + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
>> CTC_SOURCE_DIVIDE_LOGIC) {
>> + freq = read_reference_ts_freq(dev_priv);
>> + } else {
>> + freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz :
>> f24_mhz;
>> +
>> + /* Now figure out how the command stream's
>> timestamp
>> + * register increments from this frequency
>> (it might
>> + * increment only every few clock cycle).
>> + */
>> + freq >>= 3 - ((ctc_reg &
>> CTC_SHIFT_PARAMETER_MASK) >>
>> + CTC_SHIFT_PARAMETER_SHIFT);
>> + }
>> +
>> + return freq;
>> + } else if (INTEL_GEN(dev_priv) <= 10) {
>> + u32 ctc_reg = I915_READ(CTC_MODE);
>> + u64 freq = 0;
>> + u32 rpm_config_reg = 0;
>> +
>> + /* First figure out the reference frequency. There
>> are 2 ways
>> + * we can compute the frequency, either through the
>> + * TIMESTAMP_OVERRIDE register or through
>> RPM_CONFIG. CTC_MODE
>> + * tells us which one we should use.
>> + */
>> + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
>> CTC_SOURCE_DIVIDE_LOGIC) {
>> + freq = read_reference_ts_freq(dev_priv);
>> + } else {
>> + u32 crystal_clock;
>> +
>> + rpm_config_reg = I915_READ(RPM_CONFIG0);
>> + crystal_clock = (rpm_config_reg &
>> + GEN9_RPM_CONFIG0_CRYSTAL_CL
>> OCK_FREQ_MASK) >>
>> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_
>> SHIFT;
>> + switch (crystal_clock) {
>> + case
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
>> + freq = f19_2_mhz;
>> + break;
>> + case
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
>> + freq = f24_mhz;
>> + break;
>> + }
>> + }
>> +
>> + /* Now figure out how the command stream's timestamp
>> register
>> + * increments from this frequency (it might
>> increment only
>> + * every few clock cycle).
>> + */
>> + freq >>= 3 - ((rpm_config_reg &
>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER
>> _MASK) >>
>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_
>> SHIFT);
> I've been trying to understand this code. What's confusing to me is
> that on gen 9 the frequency shifting only happens for the non-
> DIVIDER_LOGIC case, while here the frequency shifting (apparently)
> applies to both DIVIDER_LOGIC cases since it is outside the if
> statement.
>
> On the other hand, this code here uses "rpm_config_reg" which is 0 for
> the non-DIVIDER_LOGIC case, since we only I915_READ rpm_config_reg
> inside the "else" statement. Either this is a bug, or it's a really
> non-trivial way of writing code and we should just do "freq >>= 3" in
> the first part of the if statement and do the appropriate division in
> the else case.
Hi Paulo,
You're right, this is wrong.
The fix is here : https://patchwork.freedesktop.org/patch/188096/
>
> As a note, in our driver we generally don't zero-initialize variables
> when we don't need. I know that doing zero initialization has its
> advantages, but the great advantage of not zero initializing things is
> that it allows GCC to catch bugs such as the one that's apparently
> happening here. Another thing which we try to do is to limit variables
> to the narrowest scope where they are needed, this way they can't even
> be accessed outside their scope: doing this would also have helped
> preventing the apparent problem we have here.
>
> So: if this code is has a bug, we need to fix it (possibly by reading
> RPM_CONFIG0 outside the if statement). If this code is actually working
> as intended, we need to make it a little less contrived. I spent a long
> time staring at our spec and I'm not sure what should be done.
>
> As an additional suggestion, you could also extract the code that deals
> with the "else" statement to its own function, just like you did with
> read_reference_ts_freq().
>
> Thanks,
> Paulo
>
>> +
>> + return freq;
>> + }
>> +
>> + DRM_ERROR("Unknown gen, unable to compute command stream
>> timestamp frequency\n");
>> + return 0;
>> +}
>> +
>> /*
>> * Determine various intel_device_info fields at runtime.
>> *
>> @@ -450,6 +552,9 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>> else if (INTEL_GEN(dev_priv) >= 10)
>> gen10_sseu_info_init(dev_priv);
>>
>> + /* Initialize command stream timestamp frequency */
>> + info->cs_timestamp_frequency =
>> read_timestamp_frequency(dev_priv);
>> +
>> DRM_DEBUG_DRIVER("slice mask: %04x\n", info-
>>> sseu.slice_mask);
>> DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info-
>>> sseu.slice_mask));
>> DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +570,6 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>> info->sseu.has_subslice_pg ? "y" : "n");
>> DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>> info->sseu.has_eu_pg ? "y" : "n");
>> + DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> + info->cs_timestamp_frequency);
>> }
>> diff --git a/include/uapi/drm/i915_drm.h
>> b/include/uapi/drm/i915_drm.h
>> index 6c02ced663f8..b57985929553 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -481,6 +481,12 @@ typedef struct drm_i915_irq_wait {
>> */
>> #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>>
>> +/* Frequency of the command streamer timestamps given by the
>> *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL
>> onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
>> +
>> typedef struct drm_i915_getparam {
>> __s32 param;
>> /*
More information about the Intel-gfx
mailing list