[Intel-gfx] [PATCH v3 3/4] drm/i915: set optimum eu/slice/sub-slice configuration based on load type
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Dec 11 12:47:50 UTC 2018
On 11/12/2018 10:14, Ankit Navik wrote:
> From: Praveen Diwakar <praveen.diwakar at intel.com>
>
> This patch will select optimum eu/slice/sub-slice configuration based on
> type of load (low, medium, high) as input.
> Based on our readings and experiments we have predefined set of optimum
> configuration for each platform(CHT, KBL).
> i915_gem_context_set_load_type will select optimum configuration from
> pre-defined optimum configuration table(opt_config).
>
> It also introduce flag update_render_config which can set by any governor.
>
> v2:
> * Move static optimum_config to device init time.
> * Rename function to appropriate name, fix data types and patch ordering.
> * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
>
> v3:
> * Add safe guard check in i915_gem_context_set_load_type.
> * Rename struct from optimum_config to i915_sseu_optimum_config to
> avoid namespace clashes.
> * Reduces memcpy for space efficient.
> * Rebase.
> * Improved commit message. (Tvrtko Ursulin)
>
> Cc: Kedar J Karanje <kedar.j.karanje at intel.com>
> Cc: Yogesh Marathe <yogesh.marathe at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
Again for the record no, I did not r-b this.
> Signed-off-by: Praveen Diwakar <praveen.diwakar at intel.com>
> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar at intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 ++
> drivers/gpu/drm/i915/i915_gem_context.c | 18 ++++++++++++
> drivers/gpu/drm/i915/i915_gem_context.h | 25 +++++++++++++++++
> drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/intel_lrc.c | 4 ++-
> 5 files changed, 94 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4aca534..4b9a8c5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1681,6 +1681,9 @@ struct drm_i915_private {
> struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
> int num_fence_regs; /* 8 on pre-965, 16 otherwise */
>
> + /* optimal slice/subslice/EU configration state */
> + struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];
Make it a pointer to struct i915_sseu_optimum_config.
> +
> unsigned int fsb_freq, mem_freq, is_ddr3;
> unsigned int skl_preferred_vco_freq;
> unsigned int max_cdclk_freq;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d040858..c0ced72 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -392,10 +392,28 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> ctx->subslice_cnt = hweight8(
> INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
> ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> + ctx->load_type = 0;
> + ctx->pending_load_type = 0;
Not needed because we zero the allocation and probably depend on
untouched fields being zero elsewhere.
>
> return ctx;
> }
>
> +
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> + enum gem_load_type type)
> +{
> + struct drm_i915_private *dev_priv = ctx->i915;
> +
> + if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
> + return;
> +
> + /* Call opt_config to get correct configuration for eu,slice,subslice */
> + ctx->slice_cnt = dev_priv->opt_config[type].slice;
> + ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> + ctx->eu_cnt = dev_priv->opt_config[type].eu;
> + ctx->pending_load_type = type;
> +}
> +
> /**
> * i915_gem_context_create_gvt - create a GVT GEM context
> * @dev: drm device *
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index e000530..a0db13c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -53,6 +53,19 @@ struct intel_context_ops {
> void (*destroy)(struct intel_context *ce);
> };
>
> +enum gem_load_type {
> + LOAD_TYPE_LOW,
> + LOAD_TYPE_MEDIUM,
> + LOAD_TYPE_HIGH,
> + LOAD_TYPE_LAST
> +};
> +
> +struct i915_sseu_optimum_config {
> + u8 slice;
> + u8 subslice;
> + u8 eu;
> +};
> +
> /**
> * struct i915_gem_context - client state
> *
> @@ -208,6 +221,16 @@ struct i915_gem_context {
>
> /** eu_cnt: used to set the # of eu to be enabled. */
> u8 eu_cnt;
> +
> + /** load_type: The designated load_type (high/medium/low) for a given
> + * number of pending commands in the command queue.
> + */
> + enum gem_load_type load_type;
> +
> + /** pending_load_type: The earlier load type that the GPU was configured
> + * for (high/medium/low).
> + */
> + enum gem_load_type pending_load_type;
> };
>
> static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> @@ -336,6 +359,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> + enum gem_load_type type);
>
> struct i915_gem_context *
> i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 0ef0c64..33c6310 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -741,6 +741,28 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
> container_of(info, struct drm_i915_private, info);
> enum pipe pipe;
>
> + struct i915_sseu_optimum_config *opt_config = NULL;
> + /* static table of slice/subslice/EU for Cherryview */
> + struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
> + {1, 1, 4}, /* Low */
> + {1, 1, 6}, /* Medium */
> + {1, 2, 6} /* High */
> + };
> +
> + /* static table of slice/subslice/EU for KBL GT2 */
> + struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
> + {1, 3, 2}, /* Low */
> + {1, 3, 4}, /* Medium */
> + {1, 3, 8} /* High */
> + };
> +
> + /* static table of slice/subslice/EU for KBL GT3 */
> + struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
> + {2, 3, 4}, /* Low */
> + {2, 3, 6}, /* Medium */
> + {2, 3, 8} /* High */
> + };
Move these to file scope as static const.
> +
> if (INTEL_GEN(dev_priv) >= 10) {
> for_each_pipe(dev_priv, pipe)
> info->num_scalers[pipe] = 2;
> @@ -840,17 +862,38 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
> /* Initialize slice/subslice/EU info */
> if (IS_HASWELL(dev_priv))
> haswell_sseu_info_init(dev_priv);
> - else if (IS_CHERRYVIEW(dev_priv))
> + else if (IS_CHERRYVIEW(dev_priv)) {
> cherryview_sseu_info_init(dev_priv);
> + opt_config = chv_config;
For my idea to work here and below add:
BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
> + }
> else if (IS_BROADWELL(dev_priv))
> broadwell_sseu_info_init(dev_priv);
> - else if (INTEL_GEN(dev_priv) == 9)
> + else if (INTEL_GEN(dev_priv) == 9) {
> gen9_sseu_info_init(dev_priv);
> +
> + if (IS_KABYLAKE(dev_priv)) {
> + switch (info->gt) {
> + default:
> + MISSING_CASE(info->gt);
This should probably be silent since absence of sseu tuning is not an
error on other platforms.
> + /* fall through */
> + case 2:
> + opt_config = kbl_gt2_config;
> + break;
> + case 3:
> + opt_config = kbl_gt3_config;
Would Kabylake table work for other Gen9 platforms?
> + break;
> + }
> + }
> + }
> else if (INTEL_GEN(dev_priv) == 10)
> gen10_sseu_info_init(dev_priv);
> else if (INTEL_GEN(dev_priv) >= 11)
> gen11_sseu_info_init(dev_priv);
>
> + if (opt_config)
> + memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
> + sizeof(struct i915_sseu_optimum_config));
And if you agree with my other suggestions then you can replace a copy with:
dev_priv->opt_config = opt_config;
Or in fact, even do it directly on assignment sites above.
> +
> /* Initialize command stream timestamp frequency */
> info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a17f676..7fb9cd2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq)
>
> reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
> /* FIXME: To avoid stale rpcs config, move it to context_pin */
> - if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
> + if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
> + (ctx->load_type != ctx->pending_load_type)) {
> rpcs_config = make_rpcs(ctx->i915);
> reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> rpcs_config);
> + ctx->load_type = ctx->pending_load_type;
> }
>
> /* True 32b PPGTT with dynamic page allocation: update PDP
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list