[Intel-gfx] [PATCH 1/2] drm/i915/guc: Fill preempt context once at init time

Mon Feb 26 14:28:36 UTC 2018

Quoting Michał Winiarski (2018-02-26 13:59:59)
> Since we're inhibiting context save of preempt context, we're no longer
> tracking the position of HEAD/TAIL. With GuC, we're adding a new
> breadcrumb for each preemption, which means that the HW will do more and
> more breadcrumb writes. Eventually the ring is filled, and we're
> submitting the preemption context with HEAD==TAIL==0, which won't result
> in breadcrumb write, but will trigger hangcheck instead.
> Instead of writing a new preempt breadcrumb for each preemption, let's
> just fill the ring once at init time (which also saves a couple of
> instructions in the tasklet).
> 
> Fixes: 517aaffe0c1b ("drm/i915/execlists: Inhibit context save/restore for the fake preempt context")
> Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_guc_submission.c | 68 +++++++++++++++++------------
>  1 file changed, 41 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 586dde579903..89e5b036061d 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -28,6 +28,10 @@
>  #include "intel_guc_submission.h"
>  #include "i915_drv.h"
>  
> +#define GUC_PREEMPT_FINISHED           0x1
> +#define GUC_PREEMPT_BREADCRUMB_DWORDS  0x8
> +#define GUC_PREEMPT_BREADCRUMB_BYTES   (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
> +
>  /**
>   * DOC: GuC-based command submission
>   *
> @@ -535,8 +539,6 @@ static void flush_ggtt_writes(struct i915_vma *vma)
>                 POSTING_READ_FW(GUC_STATUS);
>  }
>  
> -#define GUC_PREEMPT_FINISHED 0x1
> -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
>  static void inject_preempt_context(struct work_struct *work)
>  {
>         struct guc_preempt_work *preempt_work =
> @@ -546,37 +548,13 @@ static void inject_preempt_context(struct work_struct *work)
>                                              preempt_work[engine->id]);
>         struct intel_guc_client *client = guc->preempt_client;
>         struct guc_stage_desc *stage_desc = __get_stage_desc(client);
> -       struct intel_ring *ring = client->owner->engine[engine->id].ring;
>         u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
>                                                                  engine));
> -       u32 *cs = ring->vaddr + ring->tail;
>         u32 data[7];
>  
> -       if (engine->id == RCS) {
> -               cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED,
> -                               intel_hws_preempt_done_address(engine));
> -       } else {
> -               cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED,
> -                               intel_hws_preempt_done_address(engine));
> -               *cs++ = MI_NOOP;
> -               *cs++ = MI_NOOP;
> -       }
> -       *cs++ = MI_USER_INTERRUPT;
> -       *cs++ = MI_NOOP;
> -
> -       GEM_BUG_ON(!IS_ALIGNED(ring->size,
> -                              GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)));
> -       GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) !=
> -                  GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32));
> -
> -       ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32);
> -       ring->tail &= (ring->size - 1);
> -
> -       flush_ggtt_writes(ring->vma);
> -
>         spin_lock_irq(&client->wq_lock);
>         guc_wq_item_append(client, engine->guc_id, ctx_desc,
> -                          ring->tail / sizeof(u64), 0);
> +                          GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0);
>         spin_unlock_irq(&client->wq_lock);
>  
>         /*
> @@ -972,6 +950,40 @@ static void guc_client_free(struct intel_guc_client *client)
>         kfree(client);
>  }
>  
> +static void guc_fill_preempt_context(struct intel_guc *guc)
> +{
> +       struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +       struct intel_guc_client *client = guc->preempt_client;
> +       struct intel_engine_cs *engine;
> +       struct intel_ring *ring;
> +       enum intel_engine_id id;
> +       u32 *cs;
> +

Whether to use preempt_client or preempt_context is your call.

> +       for_each_engine(engine, dev_priv, id) {
		struct intel_engine *ce = &client->owner->engine[id];

		/* The preempt context must be pinned on each engine);
		GEM_BUG_ON(!ce->pin_count);

		/*
		 * We rely on the context image *not* being saved after
		 * preemption. This ensures that the RING_HEAD / RING_TAIL
		 * do not advance and they remain pointing at this command
		 * sequence forever.
		 */
		GEM_BUG_ON(!(ce->reg_state[SR] & CTX_SR_SAVE_INHIBIT));

> +               ring = client->owner->engine[id].ring;
> +               cs = ring->vaddr;
> +
> +               if (id == RCS) {
> +                       cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED,
> +                                       intel_hws_preempt_done_address(engine));
> +               } else {
> +                       cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED,
> +                                       intel_hws_preempt_done_address(engine));
> +                       *cs++ = MI_NOOP;
> +                       *cs++ = MI_NOOP;
> +               }
> +               *cs++ = MI_USER_INTERRUPT;
> +               *cs++ = MI_NOOP;
> +
> +               GEM_BUG_ON(!IS_ALIGNED(ring->size,
> +                          GUC_PREEMPT_BREADCRUMB_BYTES));
> +               GEM_BUG_ON((void *)cs - ring->vaddr !=
> +                          GUC_PREEMPT_BREADCRUMB_BYTES);

We don't care about these anymore as we don't have to worry about
instructions wrapping. Similarly, we don't need the NOOP padding after
MI_FLUSH_DW.

Keep setting ring->tail here so we can avoid the hardcoding inside the
submission. That will allow us to adapt this with ease.
-Chris