[Intel-gfx] [PATCH 6/7] drm/i915: expose command stream timestamp frequency to userspace

Paulo Zanoni paulo.r.zanoni at intel.com
Fri Dec 1 20:02:22 UTC 2017


Em Sex, 2017-11-10 às 19:08 +0000, Lionel Landwerlin escreveu:
> We use to have this fixed per generation, but starting with CNL
> userspace
> cannot tell just off the PCI ID. Let's make this information
> available. This
> is particularly useful for performance monitoring where much of the
> normalization work is done using those timestamps (this include
> pipeline
> statistics in both GL & Vulkan as well as OA reports).
> 
> v2: Use variables for 24MHz/19.2MHz values (Ewelina)
>     Renamed function & coding style (Sagar)
> 
> v3: Fix frequency read on Broadwell (Sagar)
>     Fix missing divide by 4 on <= gen4 (Sagar)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Tested-by: Rafael Antognolli <rafael.antognolli at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |   2 +
>  drivers/gpu/drm/i915/i915_drv.c          |   3 +
>  drivers/gpu/drm/i915/i915_drv.h          |   2 +
>  drivers/gpu/drm/i915/i915_reg.h          |  21 ++++++
>  drivers/gpu/drm/i915/intel_device_info.c | 107
> +++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h              |   6 ++
>  6 files changed, 141 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 533ba096b9a6..57485f29d7c9 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3245,6 +3245,8 @@ static int i915_engine_info(struct seq_file *m,
> void *unused)
>  		   yesno(dev_priv->gt.awake));
>  	seq_printf(m, "Global active requests: %d\n",
>  		   dev_priv->gt.active_requests);
> +	seq_printf(m, "CS timestamp frequency: %llu Hz\n",
> +		   dev_priv->info.cs_timestamp_frequency);
>  
>  	p = drm_seq_file_printer(m);
>  	for_each_engine(engine, dev_priv, id)
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> index d97fe9c9439a..dbd04d5f75ee 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -419,6 +419,9 @@ static int i915_getparam(struct drm_device *dev,
> void *data,
>  		if (!value)
>  			return -ENODEV;
>  		break;
> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
> +		value = INTEL_INFO(dev_priv)-
> >cs_timestamp_frequency;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 9f26c34e0e52..ebc49ca58eb7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -885,6 +885,8 @@ struct intel_device_info {
>  	/* Slice/subslice/EU info */
>  	struct sseu_dev_info sseu;
>  
> +	u64 cs_timestamp_frequency;
> +
>  	struct color_luts {
>  		u16 degamma_lut_size;
>  		u16 gamma_lut_size;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index 7aa7dc0c336b..ec9faa11e305 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1119,9 +1119,24 @@ static inline bool
> i915_mmio_reg_valid(i915_reg_t reg)
>  
>  /* RPM unit config (Gen8+) */
>  #define RPM_CONFIG0	    _MMIO(0x0D00)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 <<
> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 <<
> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
> +
>  #define RPM_CONFIG1	    _MMIO(0x0D04)
>  #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>  
> +/* GPM unit config (Gen9+) */
> +#define CTC_MODE			_MMIO(0xA26C)
> +#define  CTC_SOURCE_PARAMETER_MASK 1
> +#define  CTC_SOURCE_CRYSTAL_CLOCK	0
> +#define  CTC_SOURCE_DIVIDE_LOGIC	1
> +#define  CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  CTC_SHIFT_PARAMETER_MASK	(0x3 <<
> CTC_SHIFT_PARAMETER_SHIFT)
> +
>  /* RCP unit config (Gen8+) */
>  #define RCP_CONFIG	    _MMIO(0x0D08)
>  
> @@ -8866,6 +8881,12 @@ enum skl_power_gate {
>  #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>  #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>  
> +#define GEN9_TIMESTAMP_OVERRIDE				_MMIO
> (0x44074)
> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT	0
> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK	0x3f
> f
> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	
> 12
> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	
> (0xf << 12)
> +
>  #define _PIPE_FRMTMSTMP_A		0x70048
>  #define PIPE_FRMTMSTMP(pipe)		\
>  			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c
> b/drivers/gpu/drm/i915/intel_device_info.c
> index db03d179fc85..78bf7374fbdd 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -329,6 +329,108 @@ static void broadwell_sseu_info_init(struct
> drm_i915_private *dev_priv)
>  	sseu->has_eu_pg = 0;
>  }
>  
> +static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE);
> +	u64 base_freq, frac_freq;
> +
> +	base_freq = ((ts_override &
> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
> +		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIF
> T) + 1;
> +	base_freq *= 1000000;
> +
> +	frac_freq = ((ts_override &
> +		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR
> _MASK) >>
> +		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_
> SHIFT);
> +	if (frac_freq != 0)
> +		frac_freq = 1000000 / (frac_freq + 1);
> +
> +	return base_freq + frac_freq;
> +}
> +
> +static u64 read_timestamp_frequency(struct drm_i915_private
> *dev_priv)
> +{
> +	u64 f12_5_mhz = 12500000;
> +	u64 f19_2_mhz = 19200000;
> +	u64 f24_mhz = 24000000;
> +
> +	if (INTEL_GEN(dev_priv) <= 4) {
> +		/* PRMs say:
> +		 *
> +		 *     "The value in this register increments once
> every 16
> +		 *      hclks." (through the “Clocking
> Configuration”
> +		 *      (“CLKCFG”) MCHBAR register)
> +		 */
> +		return (dev_priv->rawclk_freq * 1000) / 16;
> +	} else if (INTEL_GEN(dev_priv) <= 8) {
> +		/* PRMs say:
> +		 *
> +		 *     "The PCU TSC counts 10ns increments; this
> timestamp
> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns
> granularity,
> +		 *      rolling over every 1.5 hours).
> +		 */
> +		return f12_5_mhz;
> +	} else if (INTEL_GEN(dev_priv) <= 9) {
> +		u32 ctc_reg = I915_READ(CTC_MODE);
> +		u64 freq = 0;
> +
> +		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
> CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_reference_ts_freq(dev_priv);
> +		} else {
> +			freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz :
> f24_mhz;
> +
> +			/* Now figure out how the command stream's
> timestamp
> +			 * register increments from this frequency
> (it might
> +			 * increment only every few clock cycle).
> +			 */
> +			freq >>= 3 - ((ctc_reg &
> CTC_SHIFT_PARAMETER_MASK) >>
> +				      CTC_SHIFT_PARAMETER_SHIFT);
> +		}
> +
> +		return freq;
> +	} else if (INTEL_GEN(dev_priv) <= 10) {
> +		u32 ctc_reg = I915_READ(CTC_MODE);
> +		u64 freq = 0;
> +		u32 rpm_config_reg = 0;
> +
> +		/* First figure out the reference frequency. There
> are 2 ways
> +		 * we can compute the frequency, either through the
> +		 * TIMESTAMP_OVERRIDE register or through
> RPM_CONFIG. CTC_MODE
> +		 * tells us which one we should use.
> +		 */
> +		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
> CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_reference_ts_freq(dev_priv);
> +		} else {
> +			u32 crystal_clock;
> +
> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
> +			crystal_clock = (rpm_config_reg &
> +					 GEN9_RPM_CONFIG0_CRYSTAL_CL
> OCK_FREQ_MASK) >>
> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_
> SHIFT;
> +			switch (crystal_clock) {
> +			case
> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
> +				freq = f19_2_mhz;
> +				break;
> +			case
> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
> +				freq = f24_mhz;
> +				break;
> +			}
> +		}
> +
> +		/* Now figure out how the command stream's timestamp
> register
> +		 * increments from this frequency (it might
> increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((rpm_config_reg &
> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER
> _MASK) >>
> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_
> SHIFT);

I've been trying to understand this code. What's confusing to me is
that on gen 9 the frequency shifting only happens for the non-
DIVIDER_LOGIC case, while here the frequency shifting (apparently)
applies to both DIVIDER_LOGIC cases since it is outside the if
statement.

On the other hand, this code here uses "rpm_config_reg" which is 0 for
the non-DIVIDER_LOGIC case, since we only I915_READ rpm_config_reg
inside the "else" statement. Either this is a bug, or it's a really
non-trivial way of writing code and we should just do "freq >>= 3" in
the first part of the if statement and do the appropriate division in
the else case.

As a note, in our driver we generally don't zero-initialize variables
when we don't need. I know that doing zero initialization has its
advantages, but the great advantage of not zero initializing things is
that it allows GCC to catch bugs such as the one that's apparently
happening here. Another thing which we try to do is to limit variables
to the narrowest scope where they are needed, this way they can't even
be accessed outside their scope: doing this would also have helped
preventing the apparent problem we have here.

So: if this code is has a bug, we need to fix it (possibly by reading
RPM_CONFIG0 outside the if statement). If this code is actually working
as intended, we need to make it a little less contrived. I spent a long
time staring at our spec and I'm not sure what should be done.

As an additional suggestion, you could also extract the code that deals
with the "else" statement to its own function, just like you did with
read_reference_ts_freq().

Thanks,
Paulo

> +
> +		return freq;
> +	}
> +
> +	DRM_ERROR("Unknown gen, unable to compute command stream
> timestamp frequency\n");
> +	return 0;
> +}
> +
>  /*
>   * Determine various intel_device_info fields at runtime.
>   *
> @@ -450,6 +552,9 @@ void intel_device_info_runtime_init(struct
> drm_i915_private *dev_priv)
>  	else if (INTEL_GEN(dev_priv) >= 10)
>  		gen10_sseu_info_init(dev_priv);
>  
> +	/* Initialize command stream timestamp frequency */
> +	info->cs_timestamp_frequency =
> read_timestamp_frequency(dev_priv);
> +
>  	DRM_DEBUG_DRIVER("slice mask: %04x\n", info-
> >sseu.slice_mask);
>  	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info-
> >sseu.slice_mask));
>  	DRM_DEBUG_DRIVER("subslice total: %u\n",
> @@ -465,4 +570,6 @@ void intel_device_info_runtime_init(struct
> drm_i915_private *dev_priv)
>  			 info->sseu.has_subslice_pg ? "y" : "n");
>  	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>  			 info->sseu.has_eu_pg ? "y" : "n");
> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
> +			 info->cs_timestamp_frequency);
>  }
> diff --git a/include/uapi/drm/i915_drm.h
> b/include/uapi/drm/i915_drm.h
> index 6c02ced663f8..b57985929553 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -481,6 +481,12 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>  
> +/* Frequency of the command streamer timestamps given by the
> *_TIMESTAMP
> + * registers. This used to be fixed per platform but from CNL
> onwards, this
> + * might vary depending on the parts.
> + */
> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
> +
>  typedef struct drm_i915_getparam {
>  	__s32 param;
>  	/*


More information about the Intel-gfx mailing list