[Intel-gfx] [PATCH] i915/oa: Simplify updating contexts
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Wed Sep 12 15:46:30 UTC 2018
On 12/09/2018 16:29, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> We can remove the update-via-batch-buffer code path, which is basically an
> effective duplicate of update-via-context-image path, if we notice that
> after we have idled the GPU, we can update the context image even of the
> kernel context directly. (Update-via-batch-buffer path existed only to
> solve the problem of how to update the kernel context image.)
>
> Only additional thing needed is to activate the edited configuration by
> sending one empty request down the pipe. This accomplishes context restore
> of the updated kernel context and so the OA configuration gets written out
> to it's control registers.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
After the discussion we had on IRC about context save/restore timings,
this looks good to me :
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_perf.c | 122 ++++---------------------------
> 1 file changed, 13 insertions(+), 109 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index ccb20230df2c..3d7a052b4cca 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1679,107 +1679,6 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
> }
> }
>
> -/*
> - * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
> - * is only used by the kernel context.
> - */
> -static int gen8_emit_oa_config(struct i915_request *rq,
> - const struct i915_oa_config *oa_config)
> -{
> - struct drm_i915_private *dev_priv = rq->i915;
> - /* The MMIO offsets for Flex EU registers aren't contiguous */
> - u32 flex_mmio[] = {
> - i915_mmio_reg_offset(EU_PERF_CNTL0),
> - i915_mmio_reg_offset(EU_PERF_CNTL1),
> - i915_mmio_reg_offset(EU_PERF_CNTL2),
> - i915_mmio_reg_offset(EU_PERF_CNTL3),
> - i915_mmio_reg_offset(EU_PERF_CNTL4),
> - i915_mmio_reg_offset(EU_PERF_CNTL5),
> - i915_mmio_reg_offset(EU_PERF_CNTL6),
> - };
> - u32 *cs;
> - int i;
> -
> - cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4);
> - if (IS_ERR(cs))
> - return PTR_ERR(cs);
> -
> - *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
> -
> - *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> - *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> - (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> - GEN8_OA_COUNTER_RESUME;
> -
> - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
> - u32 mmio = flex_mmio[i];
> -
> - /*
> - * This arbitrary default will select the 'EU FPU0 Pipeline
> - * Active' event. In the future it's anticipated that there
> - * will be an explicit 'No Event' we can select, but not
> - * yet...
> - */
> - u32 value = 0;
> -
> - if (oa_config) {
> - u32 j;
> -
> - for (j = 0; j < oa_config->flex_regs_len; j++) {
> - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
> - value = oa_config->flex_regs[j].value;
> - break;
> - }
> - }
> - }
> -
> - *cs++ = mmio;
> - *cs++ = value;
> - }
> -
> - *cs++ = MI_NOOP;
> - intel_ring_advance(rq, cs);
> -
> - return 0;
> -}
> -
> -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
> - const struct i915_oa_config *oa_config)
> -{
> - struct intel_engine_cs *engine = dev_priv->engine[RCS];
> - struct i915_timeline *timeline;
> - struct i915_request *rq;
> - int ret;
> -
> - lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
> - i915_retire_requests(dev_priv);
> -
> - rq = i915_request_alloc(engine, dev_priv->kernel_context);
> - if (IS_ERR(rq))
> - return PTR_ERR(rq);
> -
> - ret = gen8_emit_oa_config(rq, oa_config);
> - if (ret) {
> - i915_request_add(rq);
> - return ret;
> - }
> -
> - /* Queue this switch after all other activity */
> - list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
> - struct i915_request *prev;
> -
> - prev = i915_gem_active_raw(&timeline->last_request,
> - &dev_priv->drm.struct_mutex);
> - if (prev)
> - i915_request_await_dma_fence(rq, &prev->fence);
> - }
> -
> - i915_request_add(rq);
> -
> - return 0;
> -}
> -
> /*
> * Manages updating the per-context aspects of the OA stream
> * configuration across all contexts.
> @@ -1809,16 +1708,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> {
> struct intel_engine_cs *engine = dev_priv->engine[RCS];
> struct i915_gem_context *ctx;
> + struct i915_request *rq;
> int ret;
> - unsigned int wait_flags = I915_WAIT_LOCKED;
>
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
> - /* Switch away from any user context. */
> - ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
> - if (ret)
> - return ret;
> -
> /*
> * The OA register config is setup through the context image. This image
> * might be written to by the GPU on context switch (in particular on
> @@ -1833,7 +1727,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> * the GPU from any submitted work.
> */
> ret = i915_gem_wait_for_idle(dev_priv,
> - wait_flags,
> + I915_WAIT_LOCKED,
> MAX_SCHEDULE_TIMEOUT);
> if (ret)
> return ret;
> @@ -1859,7 +1753,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> i915_gem_object_unpin_map(ce->state->obj);
> }
>
> - return ret;
> + /*
> + * Apply the configuration by doing one context restore of the edited
> + * context image.
> + */
> + rq = i915_request_alloc(engine, dev_priv->kernel_context);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_add(rq);
> +
> + return 0;
> }
>
> static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
More information about the Intel-gfx
mailing list