[Intel-gfx] [PATCH v3 2/2] drm/i915/perf: Configure OAR for specific context
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Dec 6 13:22:02 UTC 2019
On 19/11/2019 00:24, Umesh Nerlige Ramappa wrote:
> Gen12 supports saving/restoring render counters per context. Apply OAR
> configuration only for the context that is passed in to perf.
>
> v2:
> - Fix OACTXCONTROL value to only stop/resume counters.
> - Remove gen12_update_reg_state_unlocked as power state is already
> applied by the caller.
>
> v3: (Lionel)
> - Move register initialization into the array
> - Assume a valid oa_config in enable_metric_set
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Looks all good, thanks!
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_perf.c | 199 +++++++++++++++++--------------
> 1 file changed, 112 insertions(+), 87 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index ca58502a67d8..ea911961fd8c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2080,20 +2080,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
> u32 *reg_state = ce->lrc_reg_state;
> int i;
>
> - if (IS_GEN(stream->perf->i915, 12)) {
> - u32 format = stream->oa_buffer.format;
> + reg_state[ctx_oactxctrl + 1] =
> + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> + GEN8_OA_COUNTER_RESUME;
>
> - reg_state[ctx_oactxctrl + 1] =
> - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> - (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
> - } else {
> - reg_state[ctx_oactxctrl + 1] =
> - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> - GEN8_OA_COUNTER_RESUME;
> - }
> -
> - for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
> + for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
> reg_state[ctx_flexeu0 + i * 2 + 1] =
> oa_config_flex_reg(stream->oa_config, flex_regs[i]);
>
> @@ -2226,34 +2218,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
> return err;
> }
>
> -static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
> +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
> {
> - struct i915_request *rq;
> - u32 *cs;
> - int err = 0;
> -
> - rq = i915_request_create(ce);
> - if (IS_ERR(rq))
> - return PTR_ERR(rq);
> -
> - cs = intel_ring_begin(rq, 4);
> - if (IS_ERR(cs)) {
> - err = PTR_ERR(cs);
> - goto out;
> - }
> -
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
> - *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> - enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
> - *cs++ = MI_NOOP;
> + int err;
> + struct intel_context *ce = stream->pinned_ctx;
> + u32 format = stream->oa_buffer.format;
> + struct flex regs_context[] = {
> + {
> + GEN8_OACTXCONTROL,
> + stream->perf->ctx_oactxctrl_offset + 1,
> + enable ? GEN8_OA_COUNTER_RESUME : 0,
> + },
> + };
> + /* Offsets in regs_lri are not used since this configuration is only
> + * applied using LRI. Initialize the correct offsets for posterity.
> + */
> +#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
> + struct flex regs_lri[] = {
> + {
> + GEN12_OAR_OACONTROL,
> + GEN12_OAR_OACONTROL_OFFSET + 1,
> + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> + (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
> + },
> + {
> + RING_CONTEXT_CONTROL(ce->engine->mmio_base),
> + CTX_CONTEXT_CONTROL,
> + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> + enable ?
> + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
> + 0)
> + },
> + };
>
> - intel_ring_advance(rq, cs);
> + /* Modify the context image of pinned context with regs_context*/
> + err = intel_context_lock_pinned(ce);
> + if (err)
> + return err;
>
> -out:
> - i915_request_add(rq);
> + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
> + intel_context_unlock_pinned(ce);
> + if (err)
> + return err;
>
> - return err;
> + /* Apply regs_lri using LRI with pinned context */
> + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
> }
>
> /*
> @@ -2279,53 +2288,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
> * per-context OA state.
> *
> * Note: it's only the RCS/Render context that has any OA state.
> + * Note: the first flex register passed must always be R_PWR_CLK_STATE
> */
> -static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> - const struct i915_oa_config *oa_config)
> +static int oa_configure_all_contexts(struct i915_perf_stream *stream,
> + struct flex *regs,
> + size_t num_regs)
> {
> struct drm_i915_private *i915 = stream->perf->i915;
> - /* The MMIO offsets for Flex EU registers aren't contiguous */
> - const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
> - struct flex regs[] = {
> - {
> - GEN8_R_PWR_CLK_STATE,
> - CTX_R_PWR_CLK_STATE,
> - },
> - {
> - IS_GEN(i915, 12) ?
> - GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
> - stream->perf->ctx_oactxctrl_offset + 1,
> - },
> - { EU_PERF_CNTL0, ctx_flexeuN(0) },
> - { EU_PERF_CNTL1, ctx_flexeuN(1) },
> - { EU_PERF_CNTL2, ctx_flexeuN(2) },
> - { EU_PERF_CNTL3, ctx_flexeuN(3) },
> - { EU_PERF_CNTL4, ctx_flexeuN(4) },
> - { EU_PERF_CNTL5, ctx_flexeuN(5) },
> - { EU_PERF_CNTL6, ctx_flexeuN(6) },
> - };
> -#undef ctx_flexeuN
> struct intel_engine_cs *engine;
> struct i915_gem_context *ctx, *cn;
> - size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
> - int i, err;
> -
> - if (IS_GEN(i915, 12)) {
> - u32 format = stream->oa_buffer.format;
> -
> - regs[1].value =
> - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> - (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
> - } else {
> - regs[1].value =
> - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> - GEN8_OA_COUNTER_RESUME;
> - }
> -
> - for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
> - regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
> + int err;
>
> lockdep_assert_held(&stream->perf->lock);
>
> @@ -2355,7 +2327,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>
> spin_unlock(&i915->gem.contexts.lock);
>
> - err = gen8_configure_context(ctx, regs, array_size);
> + err = gen8_configure_context(ctx, regs, num_regs);
> if (err) {
> i915_gem_context_put(ctx);
> return err;
> @@ -2380,7 +2352,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>
> regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>
> - err = gen8_modify_self(ce, regs, array_size);
> + err = gen8_modify_self(ce, regs, num_regs);
> if (err)
> return err;
> }
> @@ -2388,6 +2360,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> return 0;
> }
>
> +static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
> + const struct i915_oa_config *oa_config)
> +{
> + struct flex regs[] = {
> + {
> + GEN8_R_PWR_CLK_STATE,
> + CTX_R_PWR_CLK_STATE,
> + },
> + };
> +
> + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> +}
> +
> +static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> + const struct i915_oa_config *oa_config)
> +{
> + /* The MMIO offsets for Flex EU registers aren't contiguous */
> + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
> + struct flex regs[] = {
> + {
> + GEN8_R_PWR_CLK_STATE,
> + CTX_R_PWR_CLK_STATE,
> + },
> + {
> + GEN8_OACTXCONTROL,
> + stream->perf->ctx_oactxctrl_offset + 1,
> + },
> + { EU_PERF_CNTL0, ctx_flexeuN(0) },
> + { EU_PERF_CNTL1, ctx_flexeuN(1) },
> + { EU_PERF_CNTL2, ctx_flexeuN(2) },
> + { EU_PERF_CNTL3, ctx_flexeuN(3) },
> + { EU_PERF_CNTL4, ctx_flexeuN(4) },
> + { EU_PERF_CNTL5, ctx_flexeuN(5) },
> + { EU_PERF_CNTL6, ctx_flexeuN(6) },
> + };
> +#undef ctx_flexeuN
> + int i;
> +
> + regs[1].value =
> + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> + GEN8_OA_COUNTER_RESUME;
> +
> + for (i = 2; i < ARRAY_SIZE(regs); i++)
> + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
> +
> + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> +}
> +
> static int gen8_enable_metric_set(struct i915_perf_stream *stream)
> {
> struct intel_uncore *uncore = stream->uncore;
> @@ -2471,7 +2493,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
> * to make sure all slices/subslices are ON before writing to NOA
> * registers.
> */
> - ret = lrc_configure_all_contexts(stream, oa_config);
> + ret = gen12_configure_all_contexts(stream, oa_config);
> if (ret)
> return ret;
>
> @@ -2481,8 +2503,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
> * requested this.
> */
> if (stream->ctx) {
> - ret = gen12_emit_oar_config(stream->pinned_ctx,
> - oa_config != NULL);
> + ret = gen12_configure_oar_context(stream, true);
> if (ret)
> return ret;
> }
> @@ -2516,11 +2537,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
> struct intel_uncore *uncore = stream->uncore;
>
> /* Reset all contexts' slices/subslices configurations. */
> - lrc_configure_all_contexts(stream, NULL);
> + gen12_configure_all_contexts(stream, NULL);
>
> /* disable the context save/restore or OAR counters */
> if (stream->ctx)
> - gen12_emit_oar_config(stream->pinned_ctx, false);
> + gen12_configure_oar_context(stream, false);
>
> /* Make sure we disable noa to save power. */
> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> @@ -2862,7 +2883,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> return;
>
> stream = engine->i915->perf.exclusive_stream;
> - if (stream)
> + /*
> + * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
> + * is already doing that, so nothing to be done for gen12 here.
> + */
> + if (stream && INTEL_GEN(stream->perf->i915) < 12)
> gen8_update_reg_state_unlocked(ce, stream);
> }
>
More information about the Intel-gfx
mailing list