[Intel-gfx] [PATCH 3/3] drm/i915/perf: Schedule oa_config after modifying the contexts
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Mon Mar 30 13:17:18 UTC 2020
On 27/03/2020 13:22, Chris Wilson wrote:
> We wish that the scheduler emit the context modification commands prior
> to enabling the oa_config, for which we must explicitly inform it of the
> ordering constraints. This is especially important as we now wait for
> the final oa_config setup to be completed and as this wait may be on a
> distinct context to the state modifications, we need that command packet
> to be always last in the queue.
>
> We borrow the i915_active for its ability to track multiple timelines
> and the last dma_fence on each; a flexible dma_resv. Keeping track of
> each dma_fence is important for us so that we can efficiently schedule
> the requests and reprioritise as required.
>
> Reported-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_perf.c | 154 ++++++++++++++++---------
> drivers/gpu/drm/i915/i915_perf_types.h | 5 +-
> 2 files changed, 102 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3222f6cd8255..faf4b0970775 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1961,10 +1961,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
> return i915_vma_get(oa_bo->vma);
> }
>
> -static struct i915_request *
> +static int
> emit_oa_config(struct i915_perf_stream *stream,
> struct i915_oa_config *oa_config,
> - struct intel_context *ce)
> + struct intel_context *ce,
> + struct i915_active *active)
> {
> struct i915_request *rq;
> struct i915_vma *vma;
> @@ -1972,7 +1973,7 @@ emit_oa_config(struct i915_perf_stream *stream,
>
> vma = get_oa_vma(stream, oa_config);
> if (IS_ERR(vma))
> - return ERR_CAST(vma);
> + return PTR_ERR(vma);
>
> err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> if (err)
> @@ -1986,6 +1987,18 @@ emit_oa_config(struct i915_perf_stream *stream,
> goto err_vma_unpin;
> }
>
> + if (!IS_ERR_OR_NULL(active)) {
> + /* After all individual context modifications */
> + err = i915_request_await_active(rq, active,
> + I915_ACTIVE_AWAIT_ALL);
> + if (err)
> + goto err_add_request;
> +
> + err = i915_active_add_request(active, rq);
> + if (err)
> + goto err_add_request;
> + }
> +
> i915_vma_lock(vma);
> err = i915_request_await_object(rq, vma->obj, 0);
> if (!err)
> @@ -2000,14 +2013,13 @@ emit_oa_config(struct i915_perf_stream *stream,
> if (err)
> goto err_add_request;
>
> - i915_request_get(rq);
> err_add_request:
> i915_request_add(rq);
> err_vma_unpin:
> i915_vma_unpin(vma);
> err_vma_put:
> i915_vma_put(vma);
> - return err ? ERR_PTR(err) : rq;
> + return err;
> }
>
> static struct intel_context *oa_context(struct i915_perf_stream *stream)
> @@ -2015,8 +2027,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream)
> return stream->pinned_ctx ?: stream->engine->kernel_context;
> }
>
> -static struct i915_request *
> -hsw_enable_metric_set(struct i915_perf_stream *stream)
> +static int
> +hsw_enable_metric_set(struct i915_perf_stream *stream,
> + struct i915_active *active)
> {
> struct intel_uncore *uncore = stream->uncore;
>
> @@ -2035,7 +2048,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
> intel_uncore_rmw(uncore, GEN6_UCGCTL1,
> 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>
> - return emit_oa_config(stream, stream->oa_config, oa_context(stream));
> + return emit_oa_config(stream,
> + stream->oa_config, oa_context(stream),
> + active);
> }
>
> static void hsw_disable_metric_set(struct i915_perf_stream *stream)
> @@ -2182,8 +2197,10 @@ static int gen8_modify_context(struct intel_context *ce,
> return err;
> }
>
> -static int gen8_modify_self(struct intel_context *ce,
> - const struct flex *flex, unsigned int count)
> +static int
> +gen8_modify_self(struct intel_context *ce,
> + const struct flex *flex, unsigned int count,
> + struct i915_active *active)
> {
> struct i915_request *rq;
> int err;
> @@ -2194,8 +2211,17 @@ static int gen8_modify_self(struct intel_context *ce,
> if (IS_ERR(rq))
> return PTR_ERR(rq);
>
> + if (!IS_ERR_OR_NULL(active)) {
> + err = i915_active_add_request(active, rq);
> + if (err)
> + goto err_add_request;
> + }
> +
> err = gen8_load_flex(rq, ce, flex, count);
> + if (err)
> + goto err_add_request;
>
> +err_add_request:
> i915_request_add(rq);
> return err;
> }
> @@ -2229,7 +2255,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
> return err;
> }
>
> -static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
> +static int gen12_configure_oar_context(struct i915_perf_stream *stream,
> + struct i915_active *active)
> {
> int err;
> struct intel_context *ce = stream->pinned_ctx;
> @@ -2238,7 +2265,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
> {
> GEN8_OACTXCONTROL,
> stream->perf->ctx_oactxctrl_offset + 1,
> - enable ? GEN8_OA_COUNTER_RESUME : 0,
> + active ? GEN8_OA_COUNTER_RESUME : 0,
> },
> };
> /* Offsets in regs_lri are not used since this configuration is only
> @@ -2250,13 +2277,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
> GEN12_OAR_OACONTROL,
> GEN12_OAR_OACONTROL_OFFSET + 1,
> (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> - (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
> + (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
> },
> {
> RING_CONTEXT_CONTROL(ce->engine->mmio_base),
> CTX_CONTEXT_CONTROL,
> _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> - enable ?
> + active ?
> GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
> 0)
> },
> @@ -2273,7 +2300,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
> return err;
>
> /* Apply regs_lri using LRI with pinned context */
> - return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
> + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
> }
>
> /*
> @@ -2301,9 +2328,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
> * Note: it's only the RCS/Render context that has any OA state.
> * Note: the first flex register passed must always be R_PWR_CLK_STATE
> */
> -static int oa_configure_all_contexts(struct i915_perf_stream *stream,
> - struct flex *regs,
> - size_t num_regs)
> +static int
> +oa_configure_all_contexts(struct i915_perf_stream *stream,
> + struct flex *regs,
> + size_t num_regs,
> + struct i915_active *active)
> {
> struct drm_i915_private *i915 = stream->perf->i915;
> struct intel_engine_cs *engine;
> @@ -2360,7 +2389,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
>
> regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>
> - err = gen8_modify_self(ce, regs, num_regs);
> + err = gen8_modify_self(ce, regs, num_regs, active);
> if (err)
> return err;
> }
> @@ -2368,8 +2397,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
> return 0;
> }
>
> -static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
> - const struct i915_oa_config *oa_config)
> +static int
> +gen12_configure_all_contexts(struct i915_perf_stream *stream,
> + const struct i915_oa_config *oa_config,
> + struct i915_active *active)
> {
> struct flex regs[] = {
> {
> @@ -2378,11 +2409,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
> },
> };
>
> - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> + return oa_configure_all_contexts(stream,
> + regs, ARRAY_SIZE(regs),
> + active);
> }
>
> -static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> - const struct i915_oa_config *oa_config)
> +static int
> +lrc_configure_all_contexts(struct i915_perf_stream *stream,
> + const struct i915_oa_config *oa_config,
> + struct i915_active *active)
> {
> /* The MMIO offsets for Flex EU registers aren't contiguous */
> const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> @@ -2415,11 +2450,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> for (i = 2; i < ARRAY_SIZE(regs); i++)
> regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>
> - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> + return oa_configure_all_contexts(stream,
> + regs, ARRAY_SIZE(regs),
> + active);
> }
>
> -static struct i915_request *
> -gen8_enable_metric_set(struct i915_perf_stream *stream)
> +static int
> +gen8_enable_metric_set(struct i915_perf_stream *stream,
> + struct i915_active *active)
> {
> struct intel_uncore *uncore = stream->uncore;
> struct i915_oa_config *oa_config = stream->oa_config;
> @@ -2459,11 +2497,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
> * to make sure all slices/subslices are ON before writing to NOA
> * registers.
> */
> - ret = lrc_configure_all_contexts(stream, oa_config);
> + ret = lrc_configure_all_contexts(stream, oa_config, active);
> if (ret)
> - return ERR_PTR(ret);
> + return ret;
>
> - return emit_oa_config(stream, oa_config, oa_context(stream));
> + return emit_oa_config(stream,
> + stream->oa_config, oa_context(stream),
> + active);
> }
>
> static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
> @@ -2473,8 +2513,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
> 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
> }
>
> -static struct i915_request *
> -gen12_enable_metric_set(struct i915_perf_stream *stream)
> +static int
> +gen12_enable_metric_set(struct i915_perf_stream *stream,
> + struct i915_active *active)
> {
> struct intel_uncore *uncore = stream->uncore;
> struct i915_oa_config *oa_config = stream->oa_config;
> @@ -2503,9 +2544,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
> * to make sure all slices/subslices are ON before writing to NOA
> * registers.
> */
> - ret = gen12_configure_all_contexts(stream, oa_config);
> + ret = gen12_configure_all_contexts(stream, oa_config, active);
> if (ret)
> - return ERR_PTR(ret);
> + return ret;
>
> /*
> * For Gen12, performance counters are context
> @@ -2513,12 +2554,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
> * requested this.
> */
> if (stream->ctx) {
> - ret = gen12_configure_oar_context(stream, true);
> + ret = gen12_configure_oar_context(stream, active);
> if (ret)
> - return ERR_PTR(ret);
> + return ret;
> }
>
> - return emit_oa_config(stream, oa_config, oa_context(stream));
> + return emit_oa_config(stream,
> + stream->oa_config, oa_context(stream),
> + active);
> }
>
> static void gen8_disable_metric_set(struct i915_perf_stream *stream)
> @@ -2526,7 +2569,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream)
> struct intel_uncore *uncore = stream->uncore;
>
> /* Reset all contexts' slices/subslices configurations. */
> - lrc_configure_all_contexts(stream, NULL);
> + lrc_configure_all_contexts(stream, NULL, NULL);
>
> intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
> }
> @@ -2536,7 +2579,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream)
> struct intel_uncore *uncore = stream->uncore;
>
> /* Reset all contexts' slices/subslices configurations. */
> - lrc_configure_all_contexts(stream, NULL);
> + lrc_configure_all_contexts(stream, NULL, NULL);
>
> /* Make sure we disable noa to save power. */
> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> @@ -2547,11 +2590,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
> struct intel_uncore *uncore = stream->uncore;
>
> /* Reset all contexts' slices/subslices configurations. */
> - gen12_configure_all_contexts(stream, NULL);
> + gen12_configure_all_contexts(stream, NULL, NULL);
>
> /* disable the context save/restore or OAR counters */
> if (stream->ctx)
> - gen12_configure_oar_context(stream, false);
> + gen12_configure_oar_context(stream, NULL);
>
> /* Make sure we disable noa to save power. */
> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> @@ -2723,16 +2766,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
>
> static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
> {
> - struct i915_request *rq;
> + struct i915_active *active;
> + int err;
>
> - rq = stream->perf->ops.enable_metric_set(stream);
> - if (IS_ERR(rq))
> - return PTR_ERR(rq);
> + active = i915_active_create();
> + if (!active)
> + return -ENOMEM;
>
> - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> - i915_request_put(rq);
> + err = stream->perf->ops.enable_metric_set(stream, active);
> + if (err == 0)
> + __i915_active_wait(active, TASK_UNINTERRUPTIBLE);
>
> - return 0;
> + i915_active_put(active);
> + return err;
> }
>
> static void
> @@ -3217,7 +3263,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
> return -EINVAL;
>
> if (config != stream->oa_config) {
> - struct i915_request *rq;
> + int err;
>
> /*
> * If OA is bound to a specific context, emit the
> @@ -3228,13 +3274,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
> * When set globally, we use a low priority kernel context,
> * so it will effectively take effect when idle.
> */
> - rq = emit_oa_config(stream, config, oa_context(stream));
> - if (!IS_ERR(rq)) {
> + err = emit_oa_config(stream, config, oa_context(stream), NULL);
> + if (!err)
> config = xchg(&stream->oa_config, config);
> - i915_request_put(rq);
> - } else {
> - ret = PTR_ERR(rq);
> - }
> + else
> + ret = err;
> }
>
> i915_oa_config_put(config);
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index 32289cbda648..de5cbb40fddf 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -22,6 +22,7 @@
>
> struct drm_i915_private;
> struct file;
> +struct i915_active;
> struct i915_gem_context;
> struct i915_perf;
> struct i915_vma;
> @@ -340,8 +341,8 @@ struct i915_oa_ops {
> * counter reports being sampled. May apply system constraints such as
> * disabling EU clock gating as required.
> */
> - struct i915_request *
> - (*enable_metric_set)(struct i915_perf_stream *stream);
> + int (*enable_metric_set)(struct i915_perf_stream *stream,
> + struct i915_active *active);
>
> /**
> * @disable_metric_set: Remove system constraints associated with using
More information about the Intel-gfx
mailing list