[Intel-gfx] [PATCH v1] drm/i915/gen11: Preempt-to-idle support in execlists.

Tue Mar 27 23:27:10 UTC 2018

Quoting Tomasz Lis (2018-03-27 16:17:59)
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>  5 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 800230b..c32580b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>                 ((dev_priv)->info.has_logical_ring_elsq)
>  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>                 ((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>  
>  #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4364922..66b6700 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>         GEN(11), \
>         .ddb_size = 2048, \
>         .has_csr = 0, \
> -       .has_logical_ring_elsq = 1
> +       .has_logical_ring_elsq = 1, \
> +       .has_hw_preempt_to_idle = 1
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>         GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>         func(has_logical_ring_contexts); \
>         func(has_logical_ring_elsq); \
>         func(has_logical_ring_preemption); \
> +       func(has_hw_preempt_to_idle); \
>         func(has_overlay); \
>         func(has_pooled_eu); \
>         func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ba7f783..1a22de4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -153,6 +153,7 @@
>  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>  #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>  #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>  
>  #define GEN8_CTX_STATUS_COMPLETED_MASK \
>          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>                                 const struct i915_request *last,
>                                 int prio)
>  {
> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +       return (engine->i915->preempt_context ||
> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&

Well, you haven't actually disabled allocating the preempt_context so...

But at any rate, making this an engine->flag would eliminate one pointer
dance.

> +                prio > max(rq_prio(last), 0);
>  }
>  
>  /**
> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>         execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> +{
> +       struct intel_engine_execlists *execlists = &engine->execlists;
> +
> +       GEM_TRACE("%s\n", engine->name);
> +
> +       /*
> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +        */
> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
> +
> +       /* trigger preemption to idle */
> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);

Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.

> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                                   status, buf[2*head + 1],
>                                   execlists->active);
>  
> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -                                     GEN8_CTX_STATUS_PREEMPTED))
> +                       /* Check if switched to active or preempted to active */
> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))

Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.

>                                 execlists_set_active(execlists,
>                                                      EXECLISTS_ACTIVE_HWACK);
> +
>                         if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>                                 execlists_clear_active(execlists,
>                                                        EXECLISTS_ACTIVE_HWACK);
> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                         /* We should never get a COMPLETED | IDLE_ACTIVE! */
>                         GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>  
> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
> +                       /*
> +                        * Check if preempted to real idle, either directly or
> +                        * the preemptive context already finished executing
> +                        */
> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>                                 GEM_TRACE("%s preempt-idle\n", engine->name);

Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris