[Intel-gfx] [PATCH v14 13/14] drm/i915/perf: reprogram NOA muxes at the beginning of each workload

Lionel Landwerlin lionel.g.landwerlin at intel.com
Tue May 30 18:51:26 UTC 2017


There is pretty obvious bug with this patch as some OA configs spans 
more registers write than what MI_LOAD_REGISTER_IMM can do (> 128).
I'll send an updated patch.

That doesn't affect patch 14 though.

-
Lionel

On 26/05/17 12:56, Lionel Landwerlin wrote:
> Dynamic slices/subslices shutdown will effectivelly loose the NOA
> configuration uploaded in the slices/subslices. When i915 perf is in
> use, we therefore need to reprogram it.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  2 ++
>   drivers/gpu/drm/i915/i915_perf.c        | 64 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_ringbuffer.c |  3 ++
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cd1dc9ee05cb..efa8a0b302ca 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2399,6 +2399,7 @@ struct drm_i915_private {
>   			const struct i915_oa_reg *mux_regs[6];
>   			int mux_regs_lens[6];
>   			int n_mux_configs;
> +			int total_n_mux_regs;
>   
>   			const struct i915_oa_reg *b_counter_regs;
>   			int b_counter_regs_len;
> @@ -3535,6 +3536,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
>   void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>   			    struct i915_gem_context *ctx,
>   			    uint32_t *reg_state);
> +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
>   
>   /* i915_gem_evict.c */
>   int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index c281847eb56b..15b1f92fe5b5 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1438,11 +1438,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
>   	}
>   }
>   
> +static void count_total_mux_regs(struct drm_i915_private *dev_priv)
> +{
> +	int i;
> +
> +	dev_priv->perf.oa.total_n_mux_regs = 0;
> +	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
> +		dev_priv->perf.oa.total_n_mux_regs +=
> +			dev_priv->perf.oa.mux_regs_lens[i];
> +	}
> +}
> +
>   static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
>   {
>   	int ret = i915_oa_select_metric_set_hsw(dev_priv);
>   	int i;
>   
> +	count_total_mux_regs(dev_priv);
> +
>   	if (ret)
>   		return ret;
>   
> @@ -1756,6 +1769,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>   	int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
>   	int i;
>   
> +	count_total_mux_regs(dev_priv);
> +
>   	if (ret)
>   		return ret;
>   
> @@ -2094,6 +2109,55 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>   	gen8_update_reg_state_unlocked(ctx, reg_state);
>   }
>   
> +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
> +{
> +	struct drm_i915_private *dev_priv = req->i915;
> +	u32 *cs;
> +	int i, j;
> +
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
> +	if (!IS_GEN(dev_priv, 8, 9))
> +		return 0;
> +
> +	/* Perf not supported or not enabled. */
> +	if (!dev_priv->perf.initialized ||
> +	    !dev_priv->perf.oa.exclusive_stream)
> +		return 0;
> +
> +	cs = intel_ring_begin(req,
> +			      1 /* MI_LOAD_REGISTER_IMM */ +
> +			      dev_priv->perf.oa.total_n_mux_regs * 2 +
> +			      4 /* GDT_CHICKEN_BITS */ +
> +			      1 /* NOOP */);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(dev_priv->perf.oa.total_n_mux_regs);
> +
> +	*cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
> +	*cs++ = 0xA0;
> +
> +	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
> +		const struct i915_oa_reg *mux_regs =
> +			dev_priv->perf.oa.mux_regs[i];
> +		const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i];
> +
> +		for (j = 0; j < mux_regs_len; j++) {
> +			*cs++ = i915_mmio_reg_offset(mux_regs[j].addr);
> +			*cs++ = mux_regs[j].value;
> +		}
> +	}
> +
> +	*cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
> +	*cs++ = 0x80;
> +
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
>    * @stream: An i915 perf stream
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index acd1da9b62a3..67aaaebb194b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req,
>   			!(dispatch_flags & I915_DISPATCH_SECURE);
>   	u32 *cs;
>   
> +	/* Emit NOA config */
> +	i915_oa_emit_noa_config_locked(req);
> +
>   	cs = intel_ring_begin(req, 4);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);




More information about the Intel-gfx mailing list