[Intel-gfx] [PATCH v6 2/3] drm/i915: set optimum eu/slice/sub-slice configuration based on load type
Ankit Navik
ankit.p.navik at intel.com
Mon Nov 25 12:17:08 UTC 2019
This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).
It also introduce flag update_render_config which can set by any governor.
v2:
* Move static optimum_config to device init time.
* Rename function to appropriate name, fix data types and patch ordering.
* Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
v3:
* Add safe guard check in i915_gem_context_set_load_type.
* Rename struct from optimum_config to i915_sseu_optimum_config to
avoid namespace clashes.
* Reduces memcpy for space efficient.
* Rebase.
* Improved commit message. (Tvrtko Ursulin)
v4:
* Move optimum config table to file scope. (Tvrtko Ursulin)
v5:
* Adds optimal table of slice/sub-slice/EU for Gen 9 GT1.
* Rebase.
v6:
* Rebase.
Cc: Vipin Anand <vipin.anand at intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik at intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 19 ++++++++
drivers/gpu/drm/i915/gem/i915_gem_context.h | 2 +
drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 32 +++++++++++++
drivers/gpu/drm/i915/gt/intel_lrc.c | 42 +++++++++++++++-
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/intel_device_info.c | 58 ++++++++++++++++++++++-
6 files changed, 154 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ef33985..5d6cf0a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -712,10 +712,29 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
trace_i915_context_create(ctx);
atomic_set(&ctx->req_cnt, 0);
+ ctx->slice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.slice_mask);
+ ctx->subslice_cnt = hweight8(
+ RUNTIME_INFO(i915)->sseu.subslice_mask[0]);
+ ctx->eu_cnt = RUNTIME_INFO(i915)->sseu.eu_per_subslice;
return ctx;
}
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type)
+{
+ struct drm_i915_private *dev_priv = ctx->i915;
+
+ if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+ return;
+
+ /* Call opt_config to get correct configuration for eu,slice,subslice */
+ ctx->slice_cnt = dev_priv->opt_config[type].slice;
+ ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+ ctx->eu_cnt = dev_priv->opt_config[type].eu;
+ ctx->pending_load_type = type;
+}
+
static void
destroy_kernel_context(struct i915_gem_context **ctxp)
{
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 18e50a7..8677427 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -177,6 +177,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type);
struct i915_gem_context *
i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 3931c06..6847d49 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -40,6 +40,19 @@ struct i915_gem_engines_iter {
const struct i915_gem_engines *engines;
};
+enum gem_load_type {
+ LOAD_TYPE_LOW,
+ LOAD_TYPE_MEDIUM,
+ LOAD_TYPE_HIGH,
+ LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+ u8 slice;
+ u8 subslice;
+ u8 eu;
+};
+
/**
* struct i915_gem_context - client state
*
@@ -173,6 +186,25 @@ struct i915_gem_context {
*/
atomic_t req_cnt;
+ /** slice_cnt: used to set the # of slices to be enabled. */
+ u8 slice_cnt;
+
+ /** subslice_cnt: used to set the # of subslices to be enabled. */
+ u8 subslice_cnt;
+
+ /** eu_cnt: used to set the # of eu to be enabled. */
+ u8 eu_cnt;
+
+ /** load_type: The designated load_type (high/medium/low) for a given
+ * number of pending commands in the command queue.
+ */
+ enum gem_load_type load_type;
+
+ /** pending_load_type: The earlier load type that the GPU was configured
+ * for (high/medium/low).
+ */
+ enum gem_load_type pending_load_type;
+
/** jump_whitelist: Bit array for tracking cmds during cmdparsing
* Guarded by struct_mutex
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bf2c7a5..b639d24 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2429,6 +2429,36 @@ static void execlists_context_unpin(struct intel_context *ce)
intel_ring_reset(ce->ring, ce->ring->tail);
}
+static u32
+get_context_rpcs_config(struct i915_gem_context *ctx)
+{
+ u32 rpcs = 0;
+ struct drm_i915_private *dev_priv = ctx->i915;
+
+ if (INTEL_GEN(dev_priv) < 8)
+ return 0;
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
+ rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+ rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
+ rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+ rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
+ rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
+ rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ return rpcs;
+}
+
static void
__execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine)
@@ -2445,8 +2475,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
/* RPCS */
if (engine->class == RENDER_CLASS) {
- regs[CTX_R_PWR_CLK_STATE] =
- intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+ if(engine->i915->predictive_load_enable) {
+ regs[CTX_R_PWR_CLK_STATE] =
+ get_context_rpcs_config(ce->gem_context);
+ } else {
+ regs[CTX_R_PWR_CLK_STATE] =
+ intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+ }
i915_oa_init_reg_state(ce, engine);
}
@@ -2478,6 +2513,9 @@ __execlists_context_pin(struct intel_context *ce,
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
+ if (ce->gem_context->load_type != ce->gem_context->pending_load_type)
+ ce->gem_context->load_type = ce->gem_context->pending_load_type;
+
return 0;
unpin_active:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fdae5a9..3064ddf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -999,6 +999,11 @@ struct drm_i915_private {
/* protects panel power sequencer state */
struct mutex pps_mutex;
+ /* optimal slice/subslice/EU configration state */
+ struct i915_sseu_optimum_config *opt_config;
+
+ int predictive_load_enable;
+
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index a5b5713..26dedfe 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -873,6 +873,34 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
}
+/* static table of slice/subslice/EU for Cherryview */
+static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+ {1, 1, 4}, /* Low */
+ {1, 1, 6}, /* Medium */
+ {1, 2, 6} /* High */
+};
+
+/* static table of slice/subslice/EU for GLK GT1 */
+static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = {
+ {1, 2, 2}, /* Low */
+ {1, 2, 3}, /* Medium */
+ {1, 2, 6} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT2 */
+static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+ {1, 3, 2}, /* Low */
+ {1, 3, 4}, /* Medium */
+ {1, 3, 8} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT3 */
+static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+ {2, 3, 4}, /* Low */
+ {2, 3, 6}, /* Medium */
+ {2, 3, 8} /* High */
+};
+
/**
* intel_device_info_runtime_init - initialize runtime info
* @dev_priv: the i915 device
@@ -894,6 +922,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
enum pipe pipe;
+ struct i915_sseu_optimum_config *opt_config = NULL;
if (INTEL_GEN(dev_priv) >= 10) {
for_each_pipe(dev_priv, pipe)
@@ -999,12 +1028,34 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
/* Initialize slice/subslice/EU info */
if (IS_HASWELL(dev_priv))
haswell_sseu_info_init(dev_priv);
- else if (IS_CHERRYVIEW(dev_priv))
+ else if (IS_CHERRYVIEW(dev_priv)) {
cherryview_sseu_info_init(dev_priv);
+ opt_config = chv_config;
+ BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
+ }
else if (IS_BROADWELL(dev_priv))
broadwell_sseu_info_init(dev_priv);
- else if (IS_GEN(dev_priv, 9))
+ else if (IS_GEN(dev_priv, 9)) {
gen9_sseu_info_init(dev_priv);
+
+ switch (info->gt) {
+ default: /* fall through */
+ case 1:
+ opt_config = glk_gt1_config;
+ BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config)
+ != LOAD_TYPE_LAST);
+ case 2:
+ opt_config = kbl_gt2_config;
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
+ != LOAD_TYPE_LAST);
+ break;
+ case 3:
+ opt_config = kbl_gt3_config;
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
+ != LOAD_TYPE_LAST);
+ break;
+ }
+ }
else if (IS_GEN(dev_priv, 10))
gen10_sseu_info_init(dev_priv);
else if (IS_GEN(dev_priv, 11))
@@ -1017,6 +1068,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
info->ppgtt_type = INTEL_PPGTT_NONE;
}
+ if (opt_config)
+ dev_priv->opt_config = opt_config;
+
/* Initialize command stream timestamp frequency */
runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
}
--
2.7.4
More information about the Intel-gfx
mailing list