[Intel-gfx] [PATCH v3 1/2] drm/i915/perf: prune OA configs

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Jul 14 14:32:57 UTC 2017


On 14/07/17 13:20, Matthew Auld wrote:
> On 07/14, Lionel Landwerlin wrote:
>> In the following commit we'll introduce loadable userspace
>> configs. This change reworks how configurations are handled in the
>> perf driver and retains only the test configurations in kernel space.
>>
>> We now store the test config in dev_priv and resolve the id only once
>> when opening the perf stream. The OA config is then handled through a
>> pointer to the structure holding the configuration details.
>>
>> v2: Rework how test configs are handled (Lionel)
>>
>> v3: Use u32 to hold number of register (Matthew)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h       |   44 +-
>>   drivers/gpu/drm/i915/i915_oa_bdw.c    | 5360 +--------------------------------
>>   drivers/gpu/drm/i915/i915_oa_bdw.h    |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_bxt.c    | 2623 +---------------
>>   drivers/gpu/drm/i915/i915_oa_bxt.h    |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_chv.c    | 2806 +----------------
>>   drivers/gpu/drm/i915/i915_oa_chv.h    |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_glk.c    | 2535 +---------------
>>   drivers/gpu/drm/i915/i915_oa_glk.h    |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_hsw.c    |  764 +----
>>   drivers/gpu/drm/i915/i915_oa_hsw.h    |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_kblgt2.c | 2971 +-----------------
>>   drivers/gpu/drm/i915/i915_oa_kblgt2.h |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_kblgt3.c | 3020 +------------------
>>   drivers/gpu/drm/i915/i915_oa_kblgt3.h |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_sklgt2.c | 3458 +--------------------
>>   drivers/gpu/drm/i915/i915_oa_sklgt2.h |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_sklgt3.c | 3019 +------------------
>>   drivers/gpu/drm/i915/i915_oa_sklgt3.h |    8 +-
>>   drivers/gpu/drm/i915/i915_oa_sklgt4.c | 3073 +------------------
>>   drivers/gpu/drm/i915/i915_oa_sklgt4.h |    8 +-
>>   drivers/gpu/drm/i915/i915_perf.c      |  309 +-
>>   22 files changed, 504 insertions(+), 29558 deletions(-)
> <SNIP>
>
>> -static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>> +static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
>> +			       const struct i915_oa_config *oa_config)
>>   {
>>   	struct drm_i915_private *dev_priv = req->i915;
>> -	const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs;
>> -	int n_flex_regs = dev_priv->perf.oa.flex_regs_len;
>>   	/* The MMIO offsets for Flex EU registers aren't contiguous */
>>   	u32 flex_mmio[] = {
>>   		i915_mmio_reg_offset(EU_PERF_CNTL0),
>> @@ -1601,11 +1594,14 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>>   	u32 *cs;
>>   	int i;
>>   
>> -	cs = intel_ring_begin(req, n_flex_regs * 2 + 4);
>> +	if (WARN_ON(!oa_config))
>> +		return -EINVAL;
>> +
>> +	cs = intel_ring_begin(req, oa_config->flex_regs_len * 2 + 4);
>>   	if (IS_ERR(cs))
>>   		return PTR_ERR(cs);
>>   
>> -	*cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1);
>> +	*cs++ = MI_LOAD_REGISTER_IMM(oa_config->flex_regs_len + 1);
> Something doesn't add up here, if we don't supply any flex registers, which
> seems to be the case for the oa test configs, or potentially any user provided
> config then we only reserve ring_begin(4)/IMM(1), but we still go off and write
> out the default flex values as per flex_mmio. Also we could potentially over
> reserve since we can only ever write out at most flex_mmio registers.
>
> My first thought is perhaps:
>
> int flex_max = ARRAY_SIZE(flex_mmio);
>
> GEM_BUG_ON(flex_regs_len > flex_max);
>
> cs = intel_ring_begin(req, flex_max * 2 + 4);
>
> *cs++ = MI_LOAD_REGISTER_IMM(flex_max + 1);
>
> for (i = 0; i < flex_max; i++) {
>      
>      for (...) {
> 	if (addr == mmio) {
> 		value = ...
> 		++count;
> 	}
>      }
> }
>
> GEM_BUG_ON(count != flex_regs_len);

Thanks for pointing this out, there is indeed a problem.

>
>>   
>>   	*cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>>   	*cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>> @@ -1624,9 +1620,9 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>>   		u32 value = 0;
>>   		int j;
>>   
>> -		for (j = 0; j < n_flex_regs; j++) {
>> -			if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) {
>> -				value = flex_regs[j].value;
>> +		for (j = 0; j < oa_config->flex_regs_len; j++) {
>> +			if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
>> +				value = oa_config->flex_regs[j].value;
>>   				break;
>>   			}
>>   		}
>> @@ -1641,7 +1637,8 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>>   	return 0;
>>   }
>>   
>> -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv)
>> +static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
>> +						 const struct i915_oa_config *oa_config)
>>   {
>>   	struct intel_engine_cs *engine = dev_priv->engine[RCS];
>>   	struct i915_gem_timeline *timeline;
>> @@ -1656,10 +1653,12 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>>   	if (IS_ERR(req))
>>   		return PTR_ERR(req);
>>   
>> -	ret = gen8_emit_oa_config(req);
>> -	if (ret) {
>> -		i915_add_request(req);
>> -		return ret;
>> +	if (oa_config) {
>> +		ret = gen8_emit_oa_config(req, oa_config);
>> +		if (ret) {
>> +			i915_add_request(req);
>> +			return ret;
>> +		}
>>   	}
>>   
>>   	/* Queue this switch after all other activity */
>> @@ -1707,6 +1706,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>>    * Note: it's only the RCS/Render context that has any OA state.
>>    */
>>   static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> +				       const struct i915_oa_config *oa_config,
>>   				       bool interruptible)
>>   {
>>   	struct i915_gem_context *ctx;
>> @@ -1724,7 +1724,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>>   	}
>>   
>>   	/* Switch away from any user context. */
>> -	ret = gen8_switch_to_updated_kernel_context(dev_priv);
>> +	ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
>>   	if (ret)
>>   		goto out;
>>   
>> @@ -1763,7 +1763,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>>   		ce->state->obj->mm.dirty = true;
>>   		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>>   
>> -		gen8_update_reg_state_unlocked(ctx, regs);
>> +		if (oa_config)
>> +			gen8_update_reg_state_unlocked(ctx, regs, oa_config);
>>   
>>   		i915_gem_object_unpin_map(ce->state->obj);
>>   	}
>> @@ -1774,13 +1775,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>>   	return ret;
>>   }
>>   
>> -static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>> +static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>> +				  const struct i915_oa_config *oa_config)
>>   {
>> -	int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
>> -	int i;
>> -
>> -	if (ret)
>> -		return ret;
>> +	int ret;
>>   
>>   	/*
>>   	 * We disable slice/unslice clock ratio change reports on SKL since
>> @@ -1817,19 +1815,18 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>>   	 * to make sure all slices/subslices are ON before writing to NOA
>>   	 * registers.
>>   	 */
>> -	ret = gen8_configure_all_contexts(dev_priv, true);
>> +	ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
>>   	if (ret)
>>   		return ret;
>>   
>>   	I915_WRITE(GDT_CHICKEN_BITS, 0xA0);
>> -	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
>> -		config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i],
>> -			       dev_priv->perf.oa.mux_regs_lens[i]);
>> -	}
>> +
>> +	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
>> +
>>   	I915_WRITE(GDT_CHICKEN_BITS, 0x80);
>>   
>> -	config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs,
>> -		       dev_priv->perf.oa.b_counter_regs_len);
>> +	config_oa_regs(dev_priv, oa_config->b_counter_regs,
>> +		       oa_config->b_counter_regs_len);
>>   
>>   	return 0;
>>   }
>> @@ -1837,7 +1834,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>>   static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>>   {
>>   	/* Reset all contexts' slices/subslices configurations. */
>> -	gen8_configure_all_contexts(dev_priv, false);
>> +	gen8_configure_all_contexts(dev_priv, NULL, false);
>>   }
>>   
>>   static void gen7_oa_enable(struct drm_i915_private *dev_priv)
>> @@ -2011,8 +2008,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>   		return -EBUSY;
>>   	}
>>   
>> -	if (!props->metrics_set) {
>> -		DRM_DEBUG("OA metric set not specified\n");
>> +	if (!props->oa_config) {
>> +		DRM_DEBUG("OA configuration set not found/specified\n");
>>   		return -EINVAL;
>>   	}
>>   
>> @@ -2055,7 +2052,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>   	dev_priv->perf.oa.oa_buffer.format =
>>   		dev_priv->perf.oa.oa_formats[props->oa_format].format;
>>   
>> -	dev_priv->perf.oa.metrics_set = props->metrics_set;
>> +	stream->oa_config = props->oa_config;
>>   
>>   	dev_priv->perf.oa.periodic = props->oa_periodic;
>>   	if (dev_priv->perf.oa.periodic)
>> @@ -2086,7 +2083,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>   	if (ret)
>>   		goto err_oa_buf_alloc;
>>   
>> -	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
>> +	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> +						      stream->oa_config);
>>   	if (ret)
>>   		goto err_enable;
>>   
>> @@ -2113,6 +2111,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>>   			    u32 *reg_state)
>>   {
>>   	struct drm_i915_private *dev_priv = engine->i915;
>> +	struct i915_oa_config *oa_config = dev_priv->perf.oa.current_config;
> I'm confused, isn't current_config always NULL, where do we set it exactly?
> Maybe I'm missing something. Also why not just perf.exclusive_stream->oa_config,
> is that not the current config?
>
>
Oh dear, screwed up my rebase...

current_config should have been removed.



More information about the Intel-gfx mailing list