[Intel-gfx] [PATCH v2 6/9] drm/i915: expose command stream timestamp frequency to userspace
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Nov 9 14:06:31 UTC 2017
On 09/11/17 11:58, Sagar Arun Kamble wrote:
>
>
> On 11/2/2017 9:59 PM, Lionel Landwerlin wrote:
>> We use to have this fixed per generation, but starting with CNL
>> userspace
>> cannot tell just off the PCI ID. Let's make this information
>> available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_debugfs.c | 2 +
>> drivers/gpu/drm/i915/i915_drv.c | 3 +
>> drivers/gpu/drm/i915/i915_drv.h | 2 +
>> drivers/gpu/drm/i915/i915_reg.h | 21 +++++++
>> drivers/gpu/drm/i915/intel_device_info.c | 99
>> ++++++++++++++++++++++++++++++++
>> include/uapi/drm/i915_drm.h | 6 ++
>> 6 files changed, 133 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 39883cd915db..0897fd616a1f 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m,
>> void *unused)
>> yesno(dev_priv->gt.awake));
>> seq_printf(m, "Global active requests: %d\n",
>> dev_priv->gt.active_requests);
>> + seq_printf(m, "CS timestamp frequency: %llu\n",
>> + dev_priv->info.cs_timestamp_frequency);
> should be accessed through INTEL_INFO
> How about adding "Hz" to message
Done.
>> p = drm_seq_file_printer(m);
>> for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index e7e9e061073b..fdd23e79fb46 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev,
>> void *data,
>> if (!value)
>> return -ENODEV;
>> break;
>> + case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> + value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
> losing the precision here. can we make cs_timestamp_frequency u32?
Yeah, I'm not super happy about the int* of getparam.
MAX_INT limits us up to ~2GHz, which I don't think we'll ever reach.
Do you agree? Do you think we need to handle bigger values?
>> + break;
>> default:
>> DRM_DEBUG("Unknown parameter %d\n", param->param);
>> return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 6cb7cd7f9420..4e804aaeaae1 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -886,6 +886,8 @@ struct intel_device_info {
>> /* Slice/subslice/EU info */
>> struct sseu_dev_info sseu;
>> + uint64_t cs_timestamp_frequency;
>> +
> s/uint64_t/u64 - (Chris had suggested earlier)
Done.
>> struct color_luts {
>> u16 degamma_lut_size;
>> u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index a2223f01ee2a..f392f28f2cfa 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool
>> i915_mmio_reg_valid(i915_reg_t reg)
>> /* RPM unit config (Gen8+) */
>> #define RPM_CONFIG0 _MMIO(0x0D00)
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 <<
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0
>> +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1
>> +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1
>> +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 <<
>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>> #define RPM_CONFIG1 _MMIO(0x0D04)
>> #define GEN10_GT_NOA_ENABLE (1 << 9)
>> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>> +#define GEN8_CTC_MODE _MMIO(0xA26C)
>> +#define GEN8_CTC_SOURCE_PARAMETER_MASK 1
>> +#define GEN8_CTC_SOURCE_CRYSTAL_CLOCK 0
>> +#define GEN8_CTC_SOURCE_DIVIDE_LOGIC 1
>> +#define GEN8_CTC_SHIFT_PARAMETER_SHIFT 1
>> +#define GEN8_CTC_SHIFT_PARAMETER_MASK (0x3 <<
>> GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>> /* RPC unit config (Gen8+) */
>> #define RPC_CONFIG _MMIO(0x0D08)
>> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>> #define ILK_TIMESTAMP_HI _MMIO(0x70070)
>> #define IVB_TIMESTAMP_CTR _MMIO(0x44070)
>> +#define GEN8_TIMESTAMP_OVERRIDE _MMIO(0x44074)
>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT 0
>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK 0x3ff
> US_COUNTER_DIVIDER_MASK?
Sure, I thought it was just a bit too long :)
>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12
>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf <<
>> 12)
>> +
>> #define _PIPE_FRMTMSTMP_A 0x70048
>> #define PIPE_FRMTMSTMP(pipe) \
>> _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..9b71a9b6d80e 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> sseu->has_eu_pg = 0;
>> }
>> +static u64 read_timestamp_frequency_from_divide(struct
>> drm_i915_private *dev_priv)
> Should this be named read_reference_ts_freq?
Yes, thanks!
>> +{
>> + u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>> + u64 base_freq, frac_freq;
>> +
>> + base_freq = ((ts_override &
>> GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>> + base_freq *= 1000000;
>> +
>> + frac_freq = ((ts_override &
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>> + if (frac_freq != 0)
>> + frac_freq = 1000000 / (frac_freq + 1);
> Not considering numerator?
The documentation is quite terrible, but my reading is that the
numerator doesn't apply to any current generations.
>> +
>> + return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>> +{
>> + if (INTEL_GEN(dev_priv) <= 4) {
>> + /* PRMs say:
>> + *
>> + * "The value in this register increments once every 16
>> + * hclks." ("CLKCFG" register)
>> + *
>> + * Since dev_priv->rawclk_freq stores the value in kHz divided
>> + * by 4, we just need to divide it again by 4.
>> + */
> I read this as hclk is 1/4th fsb clock and timestamp is 1/16 of hclk
> so this should be 16.
You're right, but as the comment above explains, rawclk_freq is already
hclk / 4.
Another / 4 gives us / 16.
>> + return (dev_priv->rawclk_freq * 1000) / 4;
>> + } else if (INTEL_GEN(dev_priv) <= 7) {
>> + /* PRMs say:
>> + *
>> + * "The PCU TSC counts 10ns increments; this timestamp
>> + * reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>> + * rolling over every 1.5 hours).
>> + */
>> + return 12500000;
>> + } else if (INTEL_GEN(dev_priv) <= 9) {
>> + u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> + u64 freq = 0;
>> +
>> + if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) ==
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>> + freq = read_timestamp_frequency_from_divide(dev_priv);
>> + else
>> + freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>> +
>> + /* Now figure out how the command stream's timestamp register
>> + * increments from this frequency (it might increment only
>> + * every few clock cycle).
>> + */
>> + freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>> + GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this
> shift as doc says it to have 80ns base.
>> +
>> + return freq;
>> + } else if (INTEL_GEN(dev_priv) <= 10) {
>> + u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> + u64 freq = 0;
>> + u32 rpm_config_reg = 0;
>> +
>> + /* First figure out the reference frequency. There are 2 ways
>> + * we can compute the frequency, either through the
>> + * TIMESTAMP_OVERRIDE register or through CTC_MODE &
> Remove CTC_MODE as it does not itself determine the frequency.
Done, thanks.
>> + * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>> + * one we should use.
>> + */
>> + if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) ==
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>> + freq = read_timestamp_frequency_from_divide(dev_priv);
>> + } else {
>> + u32 crystal_clock;
>> +
>> + rpm_config_reg = I915_READ(RPM_CONFIG0);
>> + crystal_clock = (rpm_config_reg &
>> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>> + freq = crystal_clock ==
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>> + 19200000 : 24000000;
> switch case would be better i guess.
Done.
>> + }
>> +
>> + /* Now figure out how the command stream's timestamp register
>> + * increments from this frequency (it might increment only
>> + * every few clock cycle).
>> + */
>> + freq >>= 3 - ((rpm_config_reg &
>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> + return freq;
>> + }
>> +
>> + DRM_ERROR("Unknown gen, unable to compute command stream
>> timestamp frequency\n");
>> + return 0;
>> +}
>> +
>> /*
>> * Determine various intel_device_info fields at runtime.
>> *
>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>> else if (INTEL_GEN(dev_priv) >= 10)
>> gen10_sseu_info_init(dev_priv);
>> + /* Initialize command stream timestamp frequency */
>> + info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>> +
>> DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>> DRM_DEBUG_DRIVER("slice total: %u\n",
>> hweight8(info->sseu.slice_mask));
>> DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>> info->sseu.has_subslice_pg ? "y" : "n");
>> DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>> info->sseu.has_eu_pg ? "y" : "n");
>> + DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> + info->cs_timestamp_frequency);
>> }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 125bde7d9504..c3ff0d4947af 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>> */
>> #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
>> +/* Frequency of the command streamer timestamps given by the
>> *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL
>> onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 50
>> +
>> typedef struct drm_i915_getparam {
>> __s32 param;
>> /*
>
>
More information about the Intel-gfx
mailing list