[Intel-gfx] [PATCH 2/2] drm/i915/perf: Configure OAR for specific context
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Mon Nov 11 22:09:48 UTC 2019
Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
drivers/gpu/drm/i915/i915_perf.c | 203 ++++++++++++++++++-------------
1 file changed, 118 insertions(+), 85 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b922000e4b9b..63633e73a695 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2047,6 +2047,18 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
return 0;
}
+
+static void
+gen12_update_reg_state_unlocked(const struct intel_context *ce,
+ const struct i915_perf_stream *stream)
+{
+ u32 *reg_state = ce->lrc_reg_state;
+
+ /* Use a stable power state configuration */
+ reg_state[CTX_R_PWR_CLK_STATE] =
+ intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
+}
+
/*
* NB: It must always remain pointer safe to run this even if the OA unit
* has been disabled.
@@ -2073,20 +2085,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
u32 *reg_state = ce->lrc_reg_state;
int i;
- if (IS_GEN(stream->perf->i915, 12)) {
- u32 format = stream->oa_buffer.format;
+ reg_state[ctx_oactxctrl + 1] =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
- reg_state[ctx_oactxctrl + 1] =
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
- (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
- } else {
- reg_state[ctx_oactxctrl + 1] =
- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME;
- }
-
- for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
+ for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
reg_state[ctx_flexeu0 + i * 2 + 1] =
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
@@ -2219,34 +2223,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
return err;
}
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
{
- struct i915_request *rq;
- u32 *cs;
- int err = 0;
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
+ int err;
+ struct intel_context *ce = stream->pinned_ctx;
+ struct flex regs[] = {
+ {
+ GEN12_OAR_OACONTROL,
+ stream->perf->ctx_oactxctrl_offset + 1,
+ },
+ {
+ RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+ CTX_CONTEXT_CONTROL,
+ },
+ };
+ u32 format = stream->oa_buffer.format;
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto out;
- }
+ regs[0].value =
+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
- *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
- *cs++ = MI_NOOP;
+ /* This value is only good for LRI and not for the context image. */
+ regs[1].value = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+ enable ?
+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+ 0);
- intel_ring_advance(rq, cs);
+ err = intel_context_lock_pinned(ce);
+ if (err)
+ return err;
-out:
- i915_request_add(rq);
+ /* Modify the context image of pinned context.
+ *
+ * We will not modify the CTX CONTEXT CONTROL here as an LRI is
+ * sufficient. OAR_OACONTROL needs to be modified in the context
+ * image as well as with an LRI.
+ */
+ err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs) - 1);
+ intel_context_unlock_pinned(ce);
+ if (err)
+ return err;
- return err;
+ /* Use LRI to modify the MMIOs using pinned context */
+ return gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
}
/*
@@ -2272,53 +2291,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
* per-context OA state.
*
* Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
*/
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
- const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+ struct flex *regs,
+ size_t num_regs)
{
struct drm_i915_private *i915 = stream->perf->i915;
- /* The MMIO offsets for Flex EU registers aren't contiguous */
- const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
- struct flex regs[] = {
- {
- GEN8_R_PWR_CLK_STATE,
- CTX_R_PWR_CLK_STATE,
- },
- {
- IS_GEN(i915, 12) ?
- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
- },
- { EU_PERF_CNTL0, ctx_flexeuN(0) },
- { EU_PERF_CNTL1, ctx_flexeuN(1) },
- { EU_PERF_CNTL2, ctx_flexeuN(2) },
- { EU_PERF_CNTL3, ctx_flexeuN(3) },
- { EU_PERF_CNTL4, ctx_flexeuN(4) },
- { EU_PERF_CNTL5, ctx_flexeuN(5) },
- { EU_PERF_CNTL6, ctx_flexeuN(6) },
- };
-#undef ctx_flexeuN
struct intel_engine_cs *engine;
struct i915_gem_context *ctx, *cn;
- size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
- int i, err;
-
- if (IS_GEN(i915, 12)) {
- u32 format = stream->oa_buffer.format;
-
- regs[1].value =
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
- } else {
- regs[1].value =
- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME;
- }
-
- for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
- regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+ int err;
lockdep_assert_held(&stream->perf->lock);
@@ -2348,7 +2330,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, array_size);
+ err = gen8_configure_context(ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2373,7 +2355,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
- err = gen8_modify_self(ce, regs, array_size);
+ err = gen8_modify_self(ce, regs, num_regs);
if (err)
return err;
}
@@ -2381,6 +2363,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
return 0;
}
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+ const struct i915_oa_config *oa_config)
+{
+ struct flex regs[] = {
+ {
+ GEN8_R_PWR_CLK_STATE,
+ CTX_R_PWR_CLK_STATE,
+ },
+ };
+
+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+ const struct i915_oa_config *oa_config)
+{
+ /* The MMIO offsets for Flex EU registers aren't contiguous */
+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+ struct flex regs[] = {
+ {
+ GEN8_R_PWR_CLK_STATE,
+ CTX_R_PWR_CLK_STATE,
+ },
+ {
+ GEN8_OACTXCONTROL,
+ stream->perf->ctx_oactxctrl_offset + 1,
+ },
+ { EU_PERF_CNTL0, ctx_flexeuN(0) },
+ { EU_PERF_CNTL1, ctx_flexeuN(1) },
+ { EU_PERF_CNTL2, ctx_flexeuN(2) },
+ { EU_PERF_CNTL3, ctx_flexeuN(3) },
+ { EU_PERF_CNTL4, ctx_flexeuN(4) },
+ { EU_PERF_CNTL5, ctx_flexeuN(5) },
+ { EU_PERF_CNTL6, ctx_flexeuN(6) },
+ };
+#undef ctx_flexeuN
+ int i;
+
+ regs[1].value =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
+
+ for (i = 2; i < ARRAY_SIZE(regs); i++)
+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
@@ -2464,7 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = lrc_configure_all_contexts(stream, oa_config);
+ ret = gen12_configure_all_contexts(stream, oa_config);
if (ret)
return ret;
@@ -2474,8 +2506,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
* requested this.
*/
if (stream->ctx) {
- ret = gen12_emit_oar_config(stream->pinned_ctx,
- oa_config != NULL);
+ ret = gen12_configure_oar_context(stream, oa_config != NULL);
if (ret)
return ret;
}
@@ -2509,11 +2540,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
- lrc_configure_all_contexts(stream, NULL);
+ gen12_configure_all_contexts(stream, NULL);
/* disable the context save/restore or OAR counters */
if (stream->ctx)
- gen12_emit_oar_config(stream->pinned_ctx, false);
+ gen12_configure_oar_context(stream, false);
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2856,7 +2887,9 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
stream = engine->i915->perf.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, stream);
+ IS_GEN(stream->perf->i915, 12) ?
+ gen12_update_reg_state_unlocked(ce, stream) :
+ gen8_update_reg_state_unlocked(ce, stream);
}
/**
--
2.20.1
More information about the Intel-gfx
mailing list