[Intel-gfx] [PATCH 1/2] drm/i915/oa: Reconfigure contexts on the fly
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jul 8 12:10:34 UTC 2019
On 08/07/2019 12:19, Chris Wilson wrote:
> Avoid a global idle barrier by reconfiguring each context by rewriting
> them with MI_STORE_DWORD from the kernel context.
>
> v2: We only need to determine the desired register values once, they are
> the same for all contexts.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +
> drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +-
> drivers/gpu/drm/i915/i915_perf.c | 248 +++++++++++++++-----
> 3 files changed, 195 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index e367dce2a696..1f0d10bb88c1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -624,7 +624,9 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
> ctx->sched.priority = I915_USER_PRIORITY(prio);
> ctx->ring_size = PAGE_SIZE;
>
> + /* Isolate the kernel context from prying eyes and sticky fingers */
> GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
> + list_del_init(&ctx->link);
Why exactly?
Any repercussions for i915_sysfs/i915_l3_write? debugfs I gather you
won't care about?
Should adding to i915->contexts.list be done from gem_context_register?
What remains not a kernel context, but on a list?
Won't preempt context be missed in re-configuration?
Regards,
Tvrtko
>
> return ctx;
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e1ae1399c72b..9cc5374401e1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1552,9 +1552,12 @@ __execlists_update_reg_state(struct intel_context *ce,
> regs[CTX_RING_TAIL + 1] = ring->tail;
>
> /* RPCS */
> - if (engine->class == RENDER_CLASS)
> + if (engine->class == RENDER_CLASS) {
> regs[CTX_R_PWR_CLK_STATE + 1] =
> intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> +
> + i915_oa_init_reg_state(engine, ce, regs);
> + }
> }
>
> static int
> @@ -2966,8 +2969,6 @@ static void execlists_init_reg_state(u32 *regs,
> if (rcs) {
> regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> -
> - i915_oa_init_reg_state(engine, ce, regs);
> }
>
> regs[CTX_END] = MI_BATCH_BUFFER_END;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 357e63beb373..25c34a0b521e 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1630,6 +1630,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
> ~GT_NOA_ENABLE));
> }
>
> +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
> + i915_reg_t reg)
> +{
> + u32 mmio = i915_mmio_reg_offset(reg);
> + int i;
> +
> + /*
> + * This arbitrary default will select the 'EU FPU0 Pipeline
> + * Active' event. In the future it's anticipated that there
> + * will be an explicit 'No Event' we can select, but not yet...
> + */
> + if (!oa_config)
> + return 0;
> +
> + for (i = 0; i < oa_config->flex_regs_len; i++) {
> + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
> + return oa_config->flex_regs[i].value;
> + }
> +
> + return 0;
> +}
> /*
> * NB: It must always remain pointer safe to run this even if the OA unit
> * has been disabled.
> @@ -1663,28 +1684,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> GEN8_OA_COUNTER_RESUME);
>
> for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
> - u32 state_offset = ctx_flexeu0 + i * 2;
> - u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
> -
> - /*
> - * This arbitrary default will select the 'EU FPU0 Pipeline
> - * Active' event. In the future it's anticipated that there
> - * will be an explicit 'No Event' we can select, but not yet...
> - */
> - u32 value = 0;
> -
> - if (oa_config) {
> - u32 j;
> -
> - for (j = 0; j < oa_config->flex_regs_len; j++) {
> - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
> - value = oa_config->flex_regs[j].value;
> - break;
> - }
> - }
> - }
> -
> - CTX_REG(reg_state, state_offset, flex_regs[i], value);
> + CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
> + oa_config_flex_reg(oa_config, flex_regs[i]));
> }
>
> CTX_REG(reg_state,
> @@ -1692,6 +1693,107 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> intel_sseu_make_rpcs(i915, &ce->sseu));
> }
>
> +struct flex {
> + i915_reg_t reg;
> + u32 offset;
> + u32 value;
> +};
> +
> +static int
> +gen8_store_flex(struct i915_request *rq,
> + struct intel_context *ce,
> + const struct flex *flex, unsigned int count)
> +{
> + u32 offset;
> + u32 *cs;
> +
> + cs = intel_ring_begin(rq, 4 * count);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
> + do {
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = offset + (flex->offset + 1) * sizeof(u32);
> + *cs++ = 0;
> + *cs++ = flex->value;
> + } while (flex++, --count);
> +
> + intel_ring_advance(rq, cs);
> +
> + return 0;
> +}
> +
> +static int
> +gen8_load_flex(struct i915_request *rq,
> + struct intel_context *ce,
> + const struct flex *flex, unsigned int count)
> +{
> + u32 *cs;
> +
> + GEM_BUG_ON(!count || count > 63);
> +
> + cs = intel_ring_begin(rq, 2 * count + 2);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + *cs++ = MI_LOAD_REGISTER_IMM(count);
> + do {
> + *cs++ = i915_mmio_reg_offset(flex->reg);
> + *cs++ = flex->value;
> + } while (flex++, --count);
> + *cs++ = MI_NOOP;
> +
> + intel_ring_advance(rq, cs);
> +
> + return 0;
> +}
> +
> +static int gen8_modify_context(struct intel_context *ce,
> + const struct flex *flex, unsigned int count)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + lockdep_assert_held(&ce->pin_mutex);
> +
> + rq = i915_request_create(ce->engine->kernel_context);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + /* Serialise with the remote context */
> + err = i915_active_request_set(&ce->ring->timeline->last_request, rq);
> + if (err)
> + goto out_add;
> +
> + /* Keep the remote context alive until after we finish editing */
> + err = i915_active_ref(&ce->active, rq->fence.context, rq);
> + if (err)
> + goto out_add;
> +
> + err = gen8_store_flex(rq, ce, flex, count);
> +
> +out_add:
> + i915_request_add(rq);
> + return err;
> +}
> +
> +static int gen8_modify_self(struct intel_context *ce,
> + const struct flex *flex, unsigned int count)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + rq = i915_request_create(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + err = gen8_load_flex(rq, ce, flex, count);
> +
> + i915_request_add(rq);
> + return err;
> +}
> +
> /*
> * Manages updating the per-context aspects of the OA stream
> * configuration across all contexts.
> @@ -1716,15 +1818,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> *
> * Note: it's only the RCS/Render context that has any OA state.
> */
> -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> +static int gen8_configure_all_contexts(struct drm_i915_private *i915,
> const struct i915_oa_config *oa_config)
> {
> - unsigned int map_type = i915_coherent_map_type(dev_priv);
> + /* The MMIO offsets for Flex EU registers aren't contiguous */
> + const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
> +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
> + struct flex regs[] = {
> + {
> + GEN8_R_PWR_CLK_STATE,
> + CTX_R_PWR_CLK_STATE,
> + },
> + {
> + GEN8_OACTXCONTROL,
> + i915->perf.oa.ctx_oactxctrl_offset,
> + ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> + (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> + GEN8_OA_COUNTER_RESUME)
> + },
> + { EU_PERF_CNTL0, ctx_flexeuN(0) },
> + { EU_PERF_CNTL1, ctx_flexeuN(1) },
> + { EU_PERF_CNTL2, ctx_flexeuN(2) },
> + { EU_PERF_CNTL3, ctx_flexeuN(3) },
> + { EU_PERF_CNTL4, ctx_flexeuN(4) },
> + { EU_PERF_CNTL5, ctx_flexeuN(5) },
> + { EU_PERF_CNTL6, ctx_flexeuN(6) },
> + };
> +#undef ctx_flexeuN
> + struct intel_engine_cs *engine;
> struct i915_gem_context *ctx;
> - struct i915_request *rq;
> - int ret;
> + enum intel_engine_id id;
> + int err;
> + int i;
> +
> + for (i = 2; i < ARRAY_SIZE(regs); i++)
> + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>
> - lockdep_assert_held(&dev_priv->drm.struct_mutex);
> + lockdep_assert_held(&i915->drm.struct_mutex);
>
> /*
> * The OA register config is setup through the context image. This image
> @@ -1736,58 +1866,58 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> * this might leave small interval of time where the OA unit is
> * configured at an invalid sampling period.
> *
> - * So far the best way to work around this issue seems to be draining
> - * the GPU from any submitted work.
> + * Note that since we emit all requests from a single ring, there
> + * is still an implicit global barrier here that may cause a high
> + * priority context to wait for an otherwise independent low priority
> + * context. Contexts idle at the time of reconfiguration are not
> + * trapped behind the barrier.
> */
> - ret = i915_gem_wait_for_idle(dev_priv,
> - I915_WAIT_LOCKED,
> - MAX_SCHEDULE_TIMEOUT);
> - if (ret)
> - return ret;
> -
> - /* Update all contexts now that we've stalled the submission. */
> - list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> + list_for_each_entry(ctx, &i915->contexts.list, link) {
> struct i915_gem_engines_iter it;
> struct intel_context *ce;
>
> for_each_gem_engine(ce,
> i915_gem_context_lock_engines(ctx),
> it) {
> - u32 *regs;
> -
> if (ce->engine->class != RENDER_CLASS)
> continue;
>
> - /* OA settings will be set upon first use */
> - if (!ce->state)
> - continue;
> -
> - regs = i915_gem_object_pin_map(ce->state->obj,
> - map_type);
> - if (IS_ERR(regs)) {
> - i915_gem_context_unlock_engines(ctx);
> - return PTR_ERR(regs);
> - }
> + err = intel_context_lock_pinned(ce);
> + if (err)
> + break;
>
> - ce->state->obj->mm.dirty = true;
> - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
> + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>
> - gen8_update_reg_state_unlocked(ce, regs, oa_config);
> + /* Otherwise OA settings will be set upon first use */
> + if (intel_context_is_pinned(ce))
> + err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs));
>
> - i915_gem_object_unpin_map(ce->state->obj);
> + intel_context_unlock_pinned(ce);
> + if (err)
> + break;
> }
> i915_gem_context_unlock_engines(ctx);
> + if (err)
> + return err;
> }
>
> /*
> - * Apply the configuration by doing one context restore of the edited
> - * context image.
> + * After updating all other contexts, we need to modify ourselves.
> + * If we don't modify the kernel_context, we do not get events while
> + * idle.
> */
> - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
> - if (IS_ERR(rq))
> - return PTR_ERR(rq);
> + for_each_engine(engine, i915, id) {
> + struct intel_context *ce = engine->kernel_context;
>
> - i915_request_add(rq);
> + if (engine->class != RENDER_CLASS)
> + continue;
> +
> + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
> +
> + err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
> + if (err)
> + return err;
> + }
>
> return 0;
> }
>
More information about the Intel-gfx
mailing list