[Intel-gfx] [PATCH 6/7] drm/i915: expose command stream timestamp frequency to userspace

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Dec 1 20:43:06 UTC 2017


On 01/12/17 20:02, Paulo Zanoni wrote:
> Em Sex, 2017-11-10 às 19:08 +0000, Lionel Landwerlin escreveu:
>> We use to have this fixed per generation, but starting with CNL
>> userspace
>> cannot tell just off the PCI ID. Let's make this information
>> available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include
>> pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> v2: Use variables for 24MHz/19.2MHz values (Ewelina)
>>      Renamed function & coding style (Sagar)
>>
>> v3: Fix frequency read on Broadwell (Sagar)
>>      Fix missing divide by 4 on <= gen4 (Sagar)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Tested-by: Rafael Antognolli <rafael.antognolli at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |   2 +
>>   drivers/gpu/drm/i915/i915_drv.c          |   3 +
>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>   drivers/gpu/drm/i915/i915_reg.h          |  21 ++++++
>>   drivers/gpu/drm/i915/intel_device_info.c | 107
>> +++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h              |   6 ++
>>   6 files changed, 141 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 533ba096b9a6..57485f29d7c9 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3245,6 +3245,8 @@ static int i915_engine_info(struct seq_file *m,
>> void *unused)
>>   		   yesno(dev_priv->gt.awake));
>>   	seq_printf(m, "Global active requests: %d\n",
>>   		   dev_priv->gt.active_requests);
>> +	seq_printf(m, "CS timestamp frequency: %llu Hz\n",
>> +		   dev_priv->info.cs_timestamp_frequency);
>>   
>>   	p = drm_seq_file_printer(m);
>>   	for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index d97fe9c9439a..dbd04d5f75ee 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -419,6 +419,9 @@ static int i915_getparam(struct drm_device *dev,
>> void *data,
>>   		if (!value)
>>   			return -ENODEV;
>>   		break;
>> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> +		value = INTEL_INFO(dev_priv)-
>>> cs_timestamp_frequency;
>> +		break;
>>   	default:
>>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>>   		return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 9f26c34e0e52..ebc49ca58eb7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -885,6 +885,8 @@ struct intel_device_info {
>>   	/* Slice/subslice/EU info */
>>   	struct sseu_dev_info sseu;
>>   
>> +	u64 cs_timestamp_frequency;
>> +
>>   	struct color_luts {
>>   		u16 degamma_lut_size;
>>   		u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 7aa7dc0c336b..ec9faa11e305 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool
>> i915_mmio_reg_valid(i915_reg_t reg)
>>   
>>   /* RPM unit config (Gen8+) */
>>   #define RPM_CONFIG0	    _MMIO(0x0D00)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 <<
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 <<
>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   #define RPM_CONFIG1	    _MMIO(0x0D04)
>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>   
>> +/* GPM unit config (Gen9+) */
>> +#define CTC_MODE			_MMIO(0xA26C)
>> +#define  CTC_SOURCE_PARAMETER_MASK 1
>> +#define  CTC_SOURCE_CRYSTAL_CLOCK	0
>> +#define  CTC_SOURCE_DIVIDE_LOGIC	1
>> +#define  CTC_SHIFT_PARAMETER_SHIFT	1
>> +#define  CTC_SHIFT_PARAMETER_MASK	(0x3 <<
>> CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   /* RCP unit config (Gen8+) */
>>   #define RCP_CONFIG	    _MMIO(0x0D08)
>>   
>> @@ -8866,6 +8881,12 @@ enum skl_power_gate {
>>   #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>>   #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>>   
>> +#define GEN9_TIMESTAMP_OVERRIDE				_MMIO
>> (0x44074)
>> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT	0
>> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK	0x3f
>> f
>> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	
>> 12
>> +#define  GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	
>> (0xf << 12)
>> +
>>   #define _PIPE_FRMTMSTMP_A		0x70048
>>   #define PIPE_FRMTMSTMP(pipe)		\
>>   			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..78bf7374fbdd 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,108 @@ static void broadwell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>>   	sseu->has_eu_pg = 0;
>>   }
>>   
>> +static u64 read_reference_ts_freq(struct drm_i915_private *dev_priv)
>> +{
>> +	u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE);
>> +	u64 base_freq, frac_freq;
>> +
>> +	base_freq = ((ts_override &
>> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
>> +		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIF
>> T) + 1;
>> +	base_freq *= 1000000;
>> +
>> +	frac_freq = ((ts_override &
>> +		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR
>> _MASK) >>
>> +		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_
>> SHIFT);
>> +	if (frac_freq != 0)
>> +		frac_freq = 1000000 / (frac_freq + 1);
>> +
>> +	return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private
>> *dev_priv)
>> +{
>> +	u64 f12_5_mhz = 12500000;
>> +	u64 f19_2_mhz = 19200000;
>> +	u64 f24_mhz = 24000000;
>> +
>> +	if (INTEL_GEN(dev_priv) <= 4) {
>> +		/* PRMs say:
>> +		 *
>> +		 *     "The value in this register increments once
>> every 16
>> +		 *      hclks." (through the “Clocking
>> Configuration”
>> +		 *      (“CLKCFG”) MCHBAR register)
>> +		 */
>> +		return (dev_priv->rawclk_freq * 1000) / 16;
>> +	} else if (INTEL_GEN(dev_priv) <= 8) {
>> +		/* PRMs say:
>> +		 *
>> +		 *     "The PCU TSC counts 10ns increments; this
>> timestamp
>> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns
>> granularity,
>> +		 *      rolling over every 1.5 hours).
>> +		 */
>> +		return f12_5_mhz;
>> +	} else if (INTEL_GEN(dev_priv) <= 9) {
>> +		u32 ctc_reg = I915_READ(CTC_MODE);
>> +		u64 freq = 0;
>> +
>> +		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
>> CTC_SOURCE_DIVIDE_LOGIC) {
>> +			freq = read_reference_ts_freq(dev_priv);
>> +		} else {
>> +			freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz :
>> f24_mhz;
>> +
>> +			/* Now figure out how the command stream's
>> timestamp
>> +			 * register increments from this frequency
>> (it might
>> +			 * increment only every few clock cycle).
>> +			 */
>> +			freq >>= 3 - ((ctc_reg &
>> CTC_SHIFT_PARAMETER_MASK) >>
>> +				      CTC_SHIFT_PARAMETER_SHIFT);
>> +		}
>> +
>> +		return freq;
>> +	} else if (INTEL_GEN(dev_priv) <= 10) {
>> +		u32 ctc_reg = I915_READ(CTC_MODE);
>> +		u64 freq = 0;
>> +		u32 rpm_config_reg = 0;
>> +
>> +		/* First figure out the reference frequency. There
>> are 2 ways
>> +		 * we can compute the frequency, either through the
>> +		 * TIMESTAMP_OVERRIDE register or through
>> RPM_CONFIG. CTC_MODE
>> +		 * tells us which one we should use.
>> +		 */
>> +		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) ==
>> CTC_SOURCE_DIVIDE_LOGIC) {
>> +			freq = read_reference_ts_freq(dev_priv);
>> +		} else {
>> +			u32 crystal_clock;
>> +
>> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
>> +			crystal_clock = (rpm_config_reg &
>> +					 GEN9_RPM_CONFIG0_CRYSTAL_CL
>> OCK_FREQ_MASK) >>
>> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_
>> SHIFT;
>> +			switch (crystal_clock) {
>> +			case
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
>> +				freq = f19_2_mhz;
>> +				break;
>> +			case
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
>> +				freq = f24_mhz;
>> +				break;
>> +			}
>> +		}
>> +
>> +		/* Now figure out how the command stream's timestamp
>> register
>> +		 * increments from this frequency (it might
>> increment only
>> +		 * every few clock cycle).
>> +		 */
>> +		freq >>= 3 - ((rpm_config_reg &
>> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER
>> _MASK) >>
>> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_
>> SHIFT);
> I've been trying to understand this code. What's confusing to me is
> that on gen 9 the frequency shifting only happens for the non-
> DIVIDER_LOGIC case, while here the frequency shifting (apparently)
> applies to both DIVIDER_LOGIC cases since it is outside the if
> statement.
>
> On the other hand, this code here uses "rpm_config_reg" which is 0 for
> the non-DIVIDER_LOGIC case, since we only I915_READ rpm_config_reg
> inside the "else" statement. Either this is a bug, or it's a really
> non-trivial way of writing code and we should just do "freq >>= 3" in
> the first part of the if statement and do the appropriate division in
> the else case.

Hi Paulo,

You're right, this is wrong.
The fix is here : https://patchwork.freedesktop.org/patch/188096/

>
> As a note, in our driver we generally don't zero-initialize variables
> when we don't need. I know that doing zero initialization has its
> advantages, but the great advantage of not zero initializing things is
> that it allows GCC to catch bugs such as the one that's apparently
> happening here. Another thing which we try to do is to limit variables
> to the narrowest scope where they are needed, this way they can't even
> be accessed outside their scope: doing this would also have helped
> preventing the apparent problem we have here.
>
> So: if this code is has a bug, we need to fix it (possibly by reading
> RPM_CONFIG0 outside the if statement). If this code is actually working
> as intended, we need to make it a little less contrived. I spent a long
> time staring at our spec and I'm not sure what should be done.
>
> As an additional suggestion, you could also extract the code that deals
> with the "else" statement to its own function, just like you did with
> read_reference_ts_freq().
>
> Thanks,
> Paulo
>
>> +
>> +		return freq;
>> +	}
>> +
>> +	DRM_ERROR("Unknown gen, unable to compute command stream
>> timestamp frequency\n");
>> +	return 0;
>> +}
>> +
>>   /*
>>    * Determine various intel_device_info fields at runtime.
>>    *
>> @@ -450,6 +552,9 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>>   	else if (INTEL_GEN(dev_priv) >= 10)
>>   		gen10_sseu_info_init(dev_priv);
>>   
>> +	/* Initialize command stream timestamp frequency */
>> +	info->cs_timestamp_frequency =
>> read_timestamp_frequency(dev_priv);
>> +
>>   	DRM_DEBUG_DRIVER("slice mask: %04x\n", info-
>>> sseu.slice_mask);
>>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info-
>>> sseu.slice_mask));
>>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +570,6 @@ void intel_device_info_runtime_init(struct
>> drm_i915_private *dev_priv)
>>   			 info->sseu.has_subslice_pg ? "y" : "n");
>>   	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>   			 info->sseu.has_eu_pg ? "y" : "n");
>> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> +			 info->cs_timestamp_frequency);
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h
>> b/include/uapi/drm/i915_drm.h
>> index 6c02ced663f8..b57985929553 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -481,6 +481,12 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_HAS_CONTEXT_ISOLATION 50
>>   
>> +/* Frequency of the command streamer timestamps given by the
>> *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL
>> onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
>> +
>>   typedef struct drm_i915_getparam {
>>   	__s32 param;
>>   	/*




More information about the Intel-gfx mailing list