[Intel-gfx] [PATCH v6 2/3] drm/i915: set optimum eu/slice/sub-slice configuration based on load type
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Nov 26 10:41:43 UTC 2019
On 26/11/2019 04:51, Ankit Navik wrote:
> This patch will select optimum eu/slice/sub-slice configuration based on
> type of load (low, medium, high) as input.
> Based on our readings and experiments we have predefined set of optimum
> configuration for each platform(CHT, KBL).
> i915_gem_context_set_load_type will select optimum configuration from
> pre-defined optimum configuration table(opt_config).
>
> It also introduce flag update_render_config which can set by any governor.
>
> v2:
> * Move static optimum_config to device init time.
> * Rename function to appropriate name, fix data types and patch ordering.
> * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
>
> v3:
> * Add safe guard check in i915_gem_context_set_load_type.
> * Rename struct from optimum_config to i915_sseu_optimum_config to
> avoid namespace clashes.
> * Reduces memcpy for space efficient.
> * Rebase.
> * Improved commit message. (Tvrtko Ursulin)
>
> v4:
> * Move optimum config table to file scope. (Tvrtko Ursulin)
>
> v5:
> * Adds optimal table of slice/sub-slice/EU for Gen 9 GT1.
> * Rebase.
>
> v6:
> * Rebase.
> * Fix warnings.
>
> Cc: Vipin Anand <vipin.anand at intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik at intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_context.c | 18 +++++++
> drivers/gpu/drm/i915/gem/i915_gem_context.h | 2 +
> drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 32 ++++++++++++
> drivers/gpu/drm/i915/gt/intel_lrc.c | 42 ++++++++++++++-
> drivers/gpu/drm/i915/i915_drv.h | 5 ++
> drivers/gpu/drm/i915/intel_device_info.c | 62 ++++++++++++++++++++++-
> 6 files changed, 157 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 8288fb9..ac94f92 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -713,10 +713,28 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>
> trace_i915_context_create(ctx);
> atomic_set(&ctx->req_cnt, 0);
> + ctx->slice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.slice_mask);
> + ctx->subslice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.subslice_mask[0]);
> + ctx->eu_cnt = RUNTIME_INFO(i915)->sseu.eu_per_subslice;
I wanted to say that you need to wrap this into a named structure from
which it will be clear these members are about Dynamic EU but let me
finish the read, there might be a better way.
>
> return ctx;
> }
>
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> + enum gem_load_type type)
> +{
> + struct drm_i915_private *dev_priv = ctx->i915;
> +
> + if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
> + return;
>= I think, or bad things can happen.
> +
> + /* Call opt_config to get correct configuration for eu,slice,subslice */
> + ctx->slice_cnt = dev_priv->opt_config[type].slice;
> + ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> + ctx->eu_cnt = dev_priv->opt_config[type].eu;
> + ctx->pending_load_type = type;
> +}
> +
> static void
> destroy_kernel_context(struct i915_gem_context **ctxp)
> {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 18e50a7..8677427 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -177,6 +177,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> + enum gem_load_type type);
>
> struct i915_gem_context *
> i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 3931c06..6847d49 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -40,6 +40,19 @@ struct i915_gem_engines_iter {
> const struct i915_gem_engines *engines;
> };
>
> +enum gem_load_type {
> + LOAD_TYPE_LOW,
> + LOAD_TYPE_MEDIUM,
> + LOAD_TYPE_HIGH,
> + LOAD_TYPE_LAST
> +};
> +
> +struct i915_sseu_optimum_config {
> + u8 slice;
> + u8 subslice;
> + u8 eu;
> +};
> +
> /**
> * struct i915_gem_context - client state
> *
> @@ -173,6 +186,25 @@ struct i915_gem_context {
> */
> atomic_t req_cnt;
>
> + /** slice_cnt: used to set the # of slices to be enabled. */
> + u8 slice_cnt;
> +
> + /** subslice_cnt: used to set the # of subslices to be enabled. */
> + u8 subslice_cnt;
> +
> + /** eu_cnt: used to set the # of eu to be enabled. */
> + u8 eu_cnt;
> +
> + /** load_type: The designated load_type (high/medium/low) for a given
> + * number of pending commands in the command queue.
> + */
> + enum gem_load_type load_type;
> +
> + /** pending_load_type: The earlier load type that the GPU was configured
> + * for (high/medium/low).
> + */
> + enum gem_load_type pending_load_type;
> +
> /** jump_whitelist: Bit array for tracking cmds during cmdparsing
> * Guarded by struct_mutex
> */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 511d5a1..c3f279e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2436,6 +2436,36 @@ static void execlists_context_unpin(struct intel_context *ce)
> intel_ring_reset(ce->ring, ce->ring->tail);
> }
>
> +static u32
> +get_context_rpcs_config(struct i915_gem_context *ctx)
> +{
> + u32 rpcs = 0;
> + struct drm_i915_private *dev_priv = ctx->i915;
> +
> + if (INTEL_GEN(dev_priv) < 8)
> + return 0;
> +
> + if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
> + rpcs |= GEN8_RPCS_S_CNT_ENABLE;
> + rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
> + rpcs |= GEN8_RPCS_ENABLE;
> + }
> +
> + if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
> + rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> + rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
> + rpcs |= GEN8_RPCS_ENABLE;
> + }
> +
> + if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
> + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
> + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
> + rpcs |= GEN8_RPCS_ENABLE;
> + }
> +
> + return rpcs;
> +}
> +
> static void
> __execlists_update_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine)
> @@ -2452,8 +2482,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
>
> /* RPCS */
> if (engine->class == RENDER_CLASS) {
> - regs[CTX_R_PWR_CLK_STATE] =
> - intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> + if (engine->i915->predictive_load_enable) {
> + regs[CTX_R_PWR_CLK_STATE] =
> + get_context_rpcs_config(ce->gem_context);
You cannot do it like this because you break OA and Gen11 user
configured SSEU.
You need to have intel_sseu_make_rpcs below be the central decision
maker on what is the correct SSEU config to apply.
Order of precedence should be:
1. OA compatible configuration
2. User requested configuration (via context set param
3. Dynamic SSEU suggested configuration
4. Defaults
Perhaps we need a marker on ce saying that user configuration has been
set and then this code can remain as it were, just when you are applying
Dynamic SSEU settings you do something like:
if (!ce->user_sseu_set)
ce->sseu = make_dynamic_sseu(your optimal config);
I'll explain in the next patch what I am thinking in more detail.
Regards,
Tvrtko
> + } else {
> + regs[CTX_R_PWR_CLK_STATE] =
> + intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> + }
>
> i915_oa_init_reg_state(ce, engine);
> }
> @@ -2485,6 +2520,9 @@ __execlists_context_pin(struct intel_context *ce,
> ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> __execlists_update_reg_state(ce, engine);
>
> + if (ce->gem_context->load_type != ce->gem_context->pending_load_type)
> + ce->gem_context->load_type = ce->gem_context->pending_load_type;
> +
> return 0;
>
> unpin_active:
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fdae5a9..3064ddf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -999,6 +999,11 @@ struct drm_i915_private {
> /* protects panel power sequencer state */
> struct mutex pps_mutex;
>
> + /* optimal slice/subslice/EU configration state */
> + struct i915_sseu_optimum_config *opt_config;
> +
> + int predictive_load_enable;
> +
> unsigned int fsb_freq, mem_freq, is_ddr3;
> unsigned int skl_preferred_vco_freq;
> unsigned int max_cdclk_freq;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index a5b5713..b3c2f92 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -873,6 +873,34 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
> RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
> }
>
> +/* static table of slice/subslice/EU for Cherryview */
> +static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
> + {1, 1, 4}, /* Low */
> + {1, 1, 6}, /* Medium */
> + {1, 2, 6} /* High */
> +};
> +
> +/* static table of slice/subslice/EU for GLK GT1 */
> +static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = {
> + {1, 2, 2}, /* Low */
> + {1, 2, 3}, /* Medium */
> + {1, 2, 6} /* High */
> +};
> +
> +/* static table of slice/subslice/EU for KBL GT2 */
> +static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
> + {1, 3, 2}, /* Low */
> + {1, 3, 4}, /* Medium */
> + {1, 3, 8} /* High */
> +};
> +
> +/* static table of slice/subslice/EU for KBL GT3 */
> +static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
> + {2, 3, 4}, /* Low */
> + {2, 3, 6}, /* Medium */
> + {2, 3, 8} /* High */
> +};
> +
> /**
> * intel_device_info_runtime_init - initialize runtime info
> * @dev_priv: the i915 device
> @@ -894,6 +922,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
> struct intel_device_info *info = mkwrite_device_info(dev_priv);
> struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
> enum pipe pipe;
> + struct i915_sseu_optimum_config *opt_config = NULL;
>
> if (INTEL_GEN(dev_priv) >= 10) {
> for_each_pipe(dev_priv, pipe)
> @@ -999,12 +1028,38 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
> /* Initialize slice/subslice/EU info */
> if (IS_HASWELL(dev_priv))
> haswell_sseu_info_init(dev_priv);
> - else if (IS_CHERRYVIEW(dev_priv))
> + else if (IS_CHERRYVIEW(dev_priv)) {
> cherryview_sseu_info_init(dev_priv);
> + opt_config = (struct i915_sseu_optimum_config *)chv_config;
> + BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
> + }
> else if (IS_BROADWELL(dev_priv))
> broadwell_sseu_info_init(dev_priv);
> - else if (IS_GEN(dev_priv, 9))
> + else if (IS_GEN(dev_priv, 9)) {
> gen9_sseu_info_init(dev_priv);
> +
> + switch (info->gt) {
> + default: /* fall through */
> + case 1:
> + opt_config = (struct i915_sseu_optimum_config *)
> + glk_gt1_config;
> + BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config)
> + != LOAD_TYPE_LAST);
> + break;
> + case 2:
> + opt_config = (struct i915_sseu_optimum_config *)
> + kbl_gt2_config;
> + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
> + != LOAD_TYPE_LAST);
> + break;
> + case 3:
> + opt_config = (struct i915_sseu_optimum_config *)
> + kbl_gt3_config;
> + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
> + != LOAD_TYPE_LAST);
> + break;
> + }
> + }
> else if (IS_GEN(dev_priv, 10))
> gen10_sseu_info_init(dev_priv);
> else if (IS_GEN(dev_priv, 11))
> @@ -1017,6 +1072,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
> info->ppgtt_type = INTEL_PPGTT_NONE;
> }
>
> + if (opt_config)
> + dev_priv->opt_config = opt_config;
> +
> /* Initialize command stream timestamp frequency */
> runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
> }
>
More information about the Intel-gfx
mailing list