[Intel-gfx] [PATCH 2/2] drm/i915/perf: Configure OAR for specific context
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Mon Nov 18 18:01:25 UTC 2019
On Mon, Nov 18, 2019 at 03:42:28PM +0200, Lionel Landwerlin wrote:
>On 14/11/2019 21:21, Umesh Nerlige Ramappa wrote:
>>Gen12 supports saving/restoring render counters per context. Apply OAR
>>configuration only for the context that is passed in to perf.
>>
>>v2:
>>- Fix OACTXCONTROL value to only stop/resume counters.
>>- Remove gen12_update_reg_state_unlocked as power state is already
>> applied by the caller.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>---
>> drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++--------------
>> 1 file changed, 108 insertions(+), 85 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 221c1090ae93..2f0be5fbef4b 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
>> u32 *reg_state = ce->lrc_reg_state;
>> int i;
>>- if (IS_GEN(stream->perf->i915, 12)) {
>>- u32 format = stream->oa_buffer.format;
>>+ reg_state[ctx_oactxctrl + 1] =
>>+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>+ GEN8_OA_COUNTER_RESUME;
>>- reg_state[ctx_oactxctrl + 1] =
>>- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>- (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>- } else {
>>- reg_state[ctx_oactxctrl + 1] =
>>- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>- GEN8_OA_COUNTER_RESUME;
>>- }
>>-
>>- for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
>>+ for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
>> reg_state[ctx_flexeu0 + i * 2 + 1] =
>> oa_config_flex_reg(stream->oa_config, flex_regs[i]);
>>@@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
>> return err;
>> }
>>-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>>+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
>> {
>>- struct i915_request *rq;
>>- u32 *cs;
>>- int err = 0;
>>-
>>- rq = i915_request_create(ce);
>>- if (IS_ERR(rq))
>>- return PTR_ERR(rq);
>>+ int err;
>>+ struct intel_context *ce = stream->pinned_ctx;
>>+ struct flex regs_context[] = {
>>+ {
>>+ GEN8_OACTXCONTROL,
>>+ stream->perf->ctx_oactxctrl_offset + 1,
>>+ enable ? GEN8_OA_COUNTER_RESUME : 0,
>>+ },
>>+ };
>
>
>When do we configure the Flex registers?
I don't see flex registers in the context image dump or in the spec, so
I guess we would only configure them if they are part of the metric set.
Will post a new version with below comments.
Thanks,
Umesh
>
>
>>+ struct flex regs_lri[] = {
>>+ {
>>+ GEN12_OAR_OACONTROL,
>>+ },
>>+ {
>>+ RING_CONTEXT_CONTROL(ce->engine->mmio_base),
>>+ },
>>+ };
>>+ u32 format = stream->oa_buffer.format;
>>- cs = intel_ring_begin(rq, 4);
>>- if (IS_ERR(cs)) {
>>- err = PTR_ERR(cs);
>>- goto out;
>>- }
>>+ regs_lri[0].value =
>>+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>- *cs++ = MI_LOAD_REGISTER_IMM(1);
>>- *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>>- *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>>- *cs++ = MI_NOOP;
>>+ regs_lri[1].value =
>>+ _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>+ enable ?
>>+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
>>+ 0);
>
>
>Can't we put those values in the array above?
>
>
>>- intel_ring_advance(rq, cs);
>>+ /* Modify the context image of pinned context with regs_context*/
>>+ err = intel_context_lock_pinned(ce);
>>+ if (err)
>>+ return err;
>>-out:
>>- i915_request_add(rq);
>>+ err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
>>+ intel_context_unlock_pinned(ce);
>>+ if (err)
>>+ return err;
>>- return err;
>>+ /* Apply regs_lri using LRI with pinned context */
>>+ return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
>> }
>> /*
>>@@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>> * per-context OA state.
>> *
>> * Note: it's only the RCS/Render context that has any OA state.
>>+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
>> */
>>-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>- const struct i915_oa_config *oa_config)
>>+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
>>+ struct flex *regs,
>>+ size_t num_regs)
>> {
>> struct drm_i915_private *i915 = stream->perf->i915;
>>- /* The MMIO offsets for Flex EU registers aren't contiguous */
>>- const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>- struct flex regs[] = {
>>- {
>>- GEN8_R_PWR_CLK_STATE,
>>- CTX_R_PWR_CLK_STATE,
>>- },
>>- {
>>- IS_GEN(i915, 12) ?
>>- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
>>- stream->perf->ctx_oactxctrl_offset + 1,
>>- },
>>- { EU_PERF_CNTL0, ctx_flexeuN(0) },
>>- { EU_PERF_CNTL1, ctx_flexeuN(1) },
>>- { EU_PERF_CNTL2, ctx_flexeuN(2) },
>>- { EU_PERF_CNTL3, ctx_flexeuN(3) },
>>- { EU_PERF_CNTL4, ctx_flexeuN(4) },
>>- { EU_PERF_CNTL5, ctx_flexeuN(5) },
>>- { EU_PERF_CNTL6, ctx_flexeuN(6) },
>>- };
>>-#undef ctx_flexeuN
>> struct intel_engine_cs *engine;
>> struct i915_gem_context *ctx, *cn;
>>- size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
>>- int i, err;
>>-
>>- if (IS_GEN(i915, 12)) {
>>- u32 format = stream->oa_buffer.format;
>>-
>>- regs[1].value =
>>- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>- } else {
>>- regs[1].value =
>>- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>- GEN8_OA_COUNTER_RESUME;
>>- }
>>-
>>- for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
>>- regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>>+ int err;
>> lockdep_assert_held(&stream->perf->lock);
>>@@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>> spin_unlock(&i915->gem.contexts.lock);
>>- err = gen8_configure_context(ctx, regs, array_size);
>>+ err = gen8_configure_context(ctx, regs, num_regs);
>> if (err) {
>> i915_gem_context_put(ctx);
>> return err;
>>@@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>> regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>>- err = gen8_modify_self(ce, regs, array_size);
>>+ err = gen8_modify_self(ce, regs, num_regs);
>> if (err)
>> return err;
>> }
>>@@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>> return 0;
>> }
>>+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
>>+ const struct i915_oa_config *oa_config)
>>+{
>>+ struct flex regs[] = {
>>+ {
>>+ GEN8_R_PWR_CLK_STATE,
>>+ CTX_R_PWR_CLK_STATE,
>>+ },
>>+ };
>>+
>>+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>>+}
>>+
>>+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>+ const struct i915_oa_config *oa_config)
>>+{
>>+ /* The MMIO offsets for Flex EU registers aren't contiguous */
>>+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>+ struct flex regs[] = {
>>+ {
>>+ GEN8_R_PWR_CLK_STATE,
>>+ CTX_R_PWR_CLK_STATE,
>>+ },
>>+ {
>>+ GEN8_OACTXCONTROL,
>>+ stream->perf->ctx_oactxctrl_offset + 1,
>>+ },
>>+ { EU_PERF_CNTL0, ctx_flexeuN(0) },
>>+ { EU_PERF_CNTL1, ctx_flexeuN(1) },
>>+ { EU_PERF_CNTL2, ctx_flexeuN(2) },
>>+ { EU_PERF_CNTL3, ctx_flexeuN(3) },
>>+ { EU_PERF_CNTL4, ctx_flexeuN(4) },
>>+ { EU_PERF_CNTL5, ctx_flexeuN(5) },
>>+ { EU_PERF_CNTL6, ctx_flexeuN(6) },
>>+ };
>>+#undef ctx_flexeuN
>>+ int i;
>>+
>>+ regs[1].value =
>>+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>+ GEN8_OA_COUNTER_RESUME;
>>+
>>+ for (i = 2; i < ARRAY_SIZE(regs); i++)
>>+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>>+
>>+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>>+}
>>+
>> static int gen8_enable_metric_set(struct i915_perf_stream *stream)
>> {
>> struct intel_uncore *uncore = stream->uncore;
>>@@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>> * to make sure all slices/subslices are ON before writing to NOA
>> * registers.
>> */
>>- ret = lrc_configure_all_contexts(stream, oa_config);
>>+ ret = gen12_configure_all_contexts(stream, oa_config);
>> if (ret)
>> return ret;
>>@@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>> * requested this.
>> */
>> if (stream->ctx) {
>>- ret = gen12_emit_oar_config(stream->pinned_ctx,
>>- oa_config != NULL);
>>+ ret = gen12_configure_oar_context(stream, oa_config != NULL);
>
>
>I think you can assume oa_config is going to be != NULL in the
>enable_metric_set vfunc.
>
>
>> if (ret)
>> return ret;
>> }
>>@@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>> struct intel_uncore *uncore = stream->uncore;
>> /* Reset all contexts' slices/subslices configurations. */
>>- lrc_configure_all_contexts(stream, NULL);
>>+ gen12_configure_all_contexts(stream, NULL);
>> /* disable the context save/restore or OAR counters */
>> if (stream->ctx)
>>- gen12_emit_oar_config(stream->pinned_ctx, false);
>>+ gen12_configure_oar_context(stream, false);
>> /* Make sure we disable noa to save power. */
>> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>>@@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>> return;
>> stream = engine->i915->perf.exclusive_stream;
>>- if (stream)
>>+ /*
>>+ * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
>>+ * is already doing that, so nothing to be done for gen12 here.
>>+ */
>>+ if (stream && INTEL_GEN(stream->perf->i915) < 12)
>> gen8_update_reg_state_unlocked(ce, stream);
>> }
>
>
More information about the Intel-gfx
mailing list