[PATCH 2/2] drm/i915: set optimum eu/slice/sub-slice configuration based on load type
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Apr 16 12:14:51 UTC 2020
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).
It also introduce flag update_render_config which can set by any governor.
v2:
* Move static optimum_config to device init time.
* Rename function to appropriate name, fix data types and patch ordering.
* Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
v3:
* Add safe guard check in i915_gem_context_set_load_type.
* Rename struct from optimum_config to i915_sseu_optimum_config to
avoid namespace clashes.
* Reduces memcpy for space efficient.
* Rebase.
* Improved commit message. (Tvrtko Ursulin)
v4:
* Move optimum config table to file scope. (Tvrtko Ursulin)
v5:
* Adds optimal table of slice/sub-slice/EU for Gen 9 GT1.
* Rebase.
v6:
* Rebase.
* Fix warnings.
v7:
* Fix return conditions.
* Remove i915_gem_context_set_load_type and move logic to
__execlists_update_reg_state. (Tvrtko Ursulin)
Tvrtko Ursulin:
v8:
* Simplified and mashed up - TODO
Cc: Vipin Anand <vipin.anand at intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik at intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
.../gpu/drm/i915/gem/i915_gem_context_types.h | 13 ++++
drivers/gpu/drm/i915/gt/intel_context.c | 1 +
drivers/gpu/drm/i915/gt/intel_context.h | 2 +
drivers/gpu/drm/i915/gt/intel_context_sseu.c | 65 ++++++++++++++++++-
drivers/gpu/drm/i915/gt/intel_context_types.h | 4 ++
drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +-
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_sysfs.c | 34 ++++++++++
drivers/gpu/drm/i915/intel_device_info.c | 57 +++++++++++++++-
9 files changed, 181 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 6baeaae68dd6..05e10f930a78 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -46,6 +46,19 @@ struct i915_gem_engines_iter {
const struct i915_gem_engines *engines;
};
+enum gem_load_type {
+ LOAD_TYPE_LOW,
+ LOAD_TYPE_MEDIUM,
+ LOAD_TYPE_HIGH,
+ LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+ u8 slice;
+ u8 subslice;
+ u8 eu;
+};
+
/**
* struct i915_gem_context - client state
*
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index e4aece20bc80..527cde83046c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -291,6 +291,7 @@ intel_context_init(struct intel_context *ce,
ce->engine = engine;
ce->ops = engine->cops;
ce->sseu = engine->sseu;
+ ce->last_sseu = engine->sseu;
ce->ring = __intel_context_ring_size(SZ_4K);
ewma_runtime_init(&ce->runtime.avg);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 07be021882cc..64b879525971 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -249,4 +249,6 @@ static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
}
+struct intel_sseu *intel_sseu_make(struct intel_context *ce);
+
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index 57a30956c922..cf834279e8da 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -89,10 +89,73 @@ intel_context_reconfigure_sseu(struct intel_context *ce,
goto unlock;
ret = gen8_modify_rpcs(ce, sseu);
- if (!ret)
+ if (!ret) {
ce->sseu = sseu;
+ __set_bit(CONTEXT_USER_SSEU, &ce->flags);
+ }
unlock:
intel_context_unlock_pinned(ce);
return ret;
}
+
+/*
+ * Anything above threshold is considered as HIGH load, less is considered
+ * as LOW load and equal is considered as MEDIUM load.
+ *
+ * The threshold value of three active requests pending.
+ */
+#define PENDING_THRESHOLD_MEDIUM 3
+
+struct intel_sseu *intel_sseu_make(struct intel_context *ce)
+{
+ const unsigned int class = ce->engine->class;
+ const struct i915_sseu_optimum_config *cfg;
+ struct intel_sseu *sseu = &ce->last_sseu;
+ struct i915_gem_context *ctx;
+ enum gem_load_type load_type;
+ unsigned int pending;
+
+ GEM_BUG_ON(class != RENDER_CLASS);
+
+ ctx = rcu_dereference_protected(ce->gem_context, true);
+ if (!ctx || !ctx->i915->predictive_load_enable ||
+ test_bit(CONTEXT_USER_SSEU, &ce->flags))
+ return &ce->sseu;
+
+ pending = atomic_read(&ctx->rq_queued[class]) +
+ atomic_read(&ctx->rq_runnable[class]);
+ /*
+ * Transitioning to low state whenever pending request is zero
+ * would cause vacillation between low and high state.
+ */
+ if (pending == 0)
+ return sseu;
+
+ if (pending > PENDING_THRESHOLD_MEDIUM)
+ load_type = LOAD_TYPE_HIGH;
+ else if (pending == PENDING_THRESHOLD_MEDIUM)
+ load_type = LOAD_TYPE_MEDIUM;
+ else
+ load_type = LOAD_TYPE_LOW;
+
+ cfg = &ctx->i915->opt_config[load_type];
+
+ sseu->slice_mask = ~(~0UL << cfg->slice);
+ sseu->subslice_mask = ~(~0UL << cfg->subslice);
+ sseu->min_eus_per_subslice = cfg->eu;
+ sseu->max_eus_per_subslice = cfg->eu;
+
+#if 1
+ printk("dyn_sseu: %p pending=%d load_type=%u sseu=%x/%x/%u-%u [%s]\n",
+ ce,
+ pending,
+ load_type,
+ sseu->slice_mask, sseu->subslice_mask,
+ sseu->min_eus_per_subslice,
+ sseu->max_eus_per_subslice,
+ ctx->name);
+#endif
+
+ return sseu;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..0fda6373914d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -67,6 +67,7 @@ struct intel_context {
#define CONTEXT_BANNED 5
#define CONTEXT_FORCE_SINGLE_SUBMISSION 6
#define CONTEXT_NOPREEMPT 7
+#define CONTEXT_USER_SSEU 8
u32 *lrc_reg_state;
u64 lrc_desc;
@@ -96,6 +97,9 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+
+ /** last_sseu: Previous dynamic sseu */
+ struct intel_sseu last_sseu;
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0cd56effc1e7..6990f5cb767b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -235,7 +235,7 @@ static void execlists_init_reg_state(u32 *reg_state,
const struct intel_ring *ring,
bool close);
static void
-__execlists_update_reg_state(const struct intel_context *ce,
+__execlists_update_reg_state(struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
@@ -3149,7 +3149,7 @@ static void execlists_context_unpin(struct intel_context *ce)
}
static void
-__execlists_update_reg_state(const struct intel_context *ce,
+__execlists_update_reg_state(struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head)
{
@@ -3167,7 +3167,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
/* RPCS */
if (engine->class == RENDER_CLASS) {
regs[CTX_R_PWR_CLK_STATE] =
- intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+ intel_sseu_make_rpcs(engine->i915, intel_sseu_make(ce));
i915_oa_init_reg_state(ce, engine);
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e256b6be5d79..aa8c10764197 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -927,6 +927,11 @@ struct drm_i915_private {
/* protects panel power sequencer state */
struct mutex pps_mutex;
+ /* optimal slice/subslice/EU configration state */
+ const struct i915_sseu_optimum_config *opt_config;
+
+ bool predictive_load_enable;
+
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 45d32ef42787..0165185bc6a5 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -433,12 +433,45 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
return ret ?: count;
}
+static ssize_t deu_enable_show(struct device *kdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", i915->predictive_load_enable);
+}
+
+static ssize_t deu_enable_store(struct device *kdev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+ ssize_t ret;
+ u32 val;
+
+ ret = kstrtou32(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ /* Check invalid values */
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ i915->predictive_load_enable = val;
+
+ return count;
+}
+
static DEVICE_ATTR_RO(gt_act_freq_mhz);
static DEVICE_ATTR_RO(gt_cur_freq_mhz);
static DEVICE_ATTR_RW(gt_boost_freq_mhz);
static DEVICE_ATTR_RW(gt_max_freq_mhz);
static DEVICE_ATTR_RW(gt_min_freq_mhz);
+static DEVICE_ATTR_RW(deu_enable);
+
static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf);
@@ -474,6 +507,7 @@ static const struct attribute * const gen6_attrs[] = {
&dev_attr_gt_RP0_freq_mhz.attr,
&dev_attr_gt_RP1_freq_mhz.attr,
&dev_attr_gt_RPn_freq_mhz.attr,
+ &dev_attr_deu_enable.attr,
NULL,
};
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index db8496b4c38d..aeff8e9d52df 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -899,6 +899,34 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
}
+/* static table of slice/subslice/EU for Cherryview */
+static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+ {1, 1, 4}, /* Low */
+ {1, 1, 6}, /* Medium */
+ {1, 2, 6} /* High */
+};
+
+/* static table of slice/subslice/EU for GLK GT1 */
+static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = {
+ {1, 2, 2}, /* Low */
+ {1, 2, 3}, /* Medium */
+ {1, 2, 6} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT2 */
+static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+ {1, 3, 2}, /* Low */
+ {1, 3, 4}, /* Medium */
+ {1, 3, 8} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT3 */
+static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+ {2, 3, 4}, /* Low */
+ {2, 3, 6}, /* Medium */
+ {2, 3, 8} /* High */
+};
+
/**
* intel_device_info_runtime_init - initialize runtime info
* @dev_priv: the i915 device
@@ -1024,12 +1052,37 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
/* Initialize slice/subslice/EU info */
if (IS_HASWELL(dev_priv))
hsw_sseu_info_init(dev_priv);
- else if (IS_CHERRYVIEW(dev_priv))
+ else if (IS_CHERRYVIEW(dev_priv)) {
cherryview_sseu_info_init(dev_priv);
+ BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
+ dev_priv->opt_config = chv_config;
+ }
else if (IS_BROADWELL(dev_priv))
bdw_sseu_info_init(dev_priv);
- else if (IS_GEN(dev_priv, 9))
+ else if (IS_GEN(dev_priv, 9)) {
gen9_sseu_info_init(dev_priv);
+
+ // FIXME tursulin: are the glk/kbl tables supposed to apply to all Gen9?
+
+ switch (info->gt) {
+ default: /* fall through */
+ case 1:
+ BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config) !=
+ LOAD_TYPE_LAST);
+ dev_priv->opt_config = glk_gt1_config;
+ break;
+ case 2:
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config) !=
+ LOAD_TYPE_LAST);
+ dev_priv->opt_config = kbl_gt2_config;
+ break;
+ case 3:
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config) !=
+ LOAD_TYPE_LAST);
+ dev_priv->opt_config = kbl_gt3_config;
+ break;
+ }
+ }
else if (IS_GEN(dev_priv, 10))
gen10_sseu_info_init(dev_priv);
else if (IS_GEN(dev_priv, 11))
--
2.20.1
More information about the Intel-gfx-trybot
mailing list