[Intel-gfx] [PATCH v4] drm/i915/gen11: Preempt-to-idle support in execlists.
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Mon Jul 2 17:36:57 UTC 2018
On 29/06/18 09:50, Lis, Tomasz wrote:
>
>
> On 2018-06-11 18:37, Daniele Ceraolo Spurio wrote:
>>
>>
>> On 25/05/18 11:26, Tomasz Lis wrote:
>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>> bit within Execlist Control Register, and receiving preemption result
>>> from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when GuC is
>>> active.
>>>
>>> v2: Added needs_preempt_context() change so that it is not created when
>>> preempt-to-idle is supported. (Chris)
>>> Updated setting HWACK flag so that it is cleared after
>>> preempt-to-dle. (Chris, Daniele)
>>> Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>
>>> v3: Fixed needs_preempt_context() change. (Chris)
>>> Merged preemption trigger functions to one. (Chris)
>>> Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>> since idle-to-idle case will not have it set.
>>>
>>> v4: Simplified needs_preempt_context() change. (Daniele)
>>> Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>>
>>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> Cc: Michal Winiarski <michal.winiarski at intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.h | 2 +
>>> drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
>>> drivers/gpu/drm/i915/i915_pci.c | 3 +-
>>> drivers/gpu/drm/i915/intel_device_info.h | 1 +
>>> drivers/gpu/drm/i915/intel_lrc.c | 113
>>> +++++++++++++++++++++----------
>>> drivers/gpu/drm/i915/intel_lrc.h | 1 +
>>> 6 files changed, 86 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 487922f..35eddf7 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private
>>> *dev_priv)
>>> ((dev_priv)->info.has_logical_ring_elsq)
>>> #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>> ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> + ((dev_priv)->info.has_hw_preempt_to_idle)
>>> #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 45393f6..341a5ff 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context
>>> **ctxp)
>>> static bool needs_preempt_context(struct drm_i915_private *i915)
>>> {
>>> - return HAS_LOGICAL_RING_PREEMPTION(i915);
>>> + return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>> + !HAS_HW_PREEMPT_TO_IDLE(i915);
>>> }
>>> int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 97a91e6a..ee09926 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -593,7 +593,8 @@ static const struct intel_device_info
>>> intel_cannonlake_info = {
>>> GEN(11), \
>>> .ddb_size = 2048, \
>>> .has_csr = 0, \
>>> - .has_logical_ring_elsq = 1
>>> + .has_logical_ring_elsq = 1, \
>>> + .has_hw_preempt_to_idle = 1
>>> static const struct intel_device_info intel_icelake_11_info = {
>>> GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>> func(has_logical_ring_contexts); \
>>> func(has_logical_ring_elsq); \
>>> func(has_logical_ring_preemption); \
>>> + func(has_hw_preempt_to_idle); \
>>> func(has_overlay); \
>>> func(has_pooled_eu); \
>>> func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 8a6058b..f95cb37 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -154,6 +154,7 @@
>>> #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
>>> #define GEN8_CTX_STATUS_COMPLETE (1 << 4)
>>> #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
>>> #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>> (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port
>>> *port, struct i915_request *rq)
>>> static void inject_preempt_context(struct intel_engine_cs *engine)
>>
>> continuing the discussion from the previous patch, I still think that
>> we should rename this function now that it doesn't inject a context on
>> some gens. A new function name should be relatively trivial to handle
>> from other patch series hitting the area (compared to having a second
>> function).
> Ok, will rename it then.
> What would be the most adequate name? execlist_send_preempt_to_idle()?
even something simpler like "inject_preemption()" would work IMO. But
I've always been bad with naming, so I'll leave it to your judgment :)
Daniele
>>
>>> {
>>> struct intel_engine_execlists *execlists = &engine->execlists;
>>> - struct intel_context *ce =
>>> - to_intel_context(engine->i915->preempt_context, engine);
>>> - unsigned int n;
>>> -
>>> - GEM_BUG_ON(execlists->preempt_complete_status !=
>>> - upper_32_bits(ce->lrc_desc));
>>> - GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> - CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> - /*
>>> - * Switch to our empty preempt context so
>>> - * the state of the GPU is known (idle).
>>> - */
>>> GEM_TRACE("%s\n", engine->name);
>>> - for (n = execlists_num_ports(execlists); --n; )
>>> - write_desc(execlists, 0, n);
>>> + if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>> + /*
>>> + * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> + * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> + */
>>> + GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>> - write_desc(execlists, ce->lrc_desc, n);
>>> + /*
>>> + * If we have hardware preempt-to-idle, we do not need to
>>> + * inject any job to the hardware. We only set a flag.
>>> + */
>>> + writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> + } else {
>>> + struct intel_context *ce =
>>> + to_intel_context(engine->i915->preempt_context, engine);
>>> + unsigned int n;
>>> - /* we need to manually load the submit queue */
>>> - if (execlists->ctrl_reg)
>>> - writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> + GEM_BUG_ON(execlists->preempt_complete_status !=
>>> + upper_32_bits(ce->lrc_desc));
>>> + GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> + CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> +
>>> + /*
>>> + * Switch to our empty preempt context so
>>> + * the state of the GPU is known (idle).
>>> + */
>>> + for (n = execlists_num_ports(execlists); --n; )
>>> + write_desc(execlists, 0, n);
>>> +
>>> + write_desc(execlists, ce->lrc_desc, n);
>>> +
>>> + /* we need to manually load the submit queue */
>>> + if (execlists->ctrl_reg)
>>> + writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> + }
>>> execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>> execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>>> @@ -1031,22 +1047,48 @@ static void process_csb(struct
>>> intel_engine_cs *engine)
>>> status, buf[2*head + 1],
>>> execlists->active);
>>> - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> - GEN8_CTX_STATUS_PREEMPTED))
>>> - execlists_set_active(execlists,
>>> - EXECLISTS_ACTIVE_HWACK);
>>> - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> - execlists_clear_active(execlists,
>>> - EXECLISTS_ACTIVE_HWACK);
>>> + /*
>>> + * Check if preempted from idle to idle directly.
>>> + * The STATUS_IDLE_ACTIVE flag is used to mark
>>> + * such transition.
>>> + */
>>> + if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>> + (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>> +
>>> + /* Cannot be waiting for HWACK while HW is idle */
>>
>> This comment does not match the check, since if the
>> EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not
>> that we're waiting for it. Personally I would just remove the BUG_ON
>> since we don't really care about the value of HWACK as long as
>> EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not
>> going to submit work until the whole preempt process is complete. A
>> BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in
>> complete_preempt_context so we're covered on that side.
> Will remove.
>>
>> With the 2 minor comments addressed:
>>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>
>> Daniele
>>
>>> + GEM_BUG_ON(execlists_is_active(execlists,
>>> + EXECLISTS_ACTIVE_HWACK));
>>> - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> - continue;
>>> + /*
>>> + * We could not have COMPLETED anything
>>> + * if we were idle before preemption.
>>> + */
>>> + GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>> + } else {
>>> + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> + GEN8_CTX_STATUS_PREEMPTED))
>>> + execlists_set_active(execlists,
>>> + EXECLISTS_ACTIVE_HWACK);
>>> +
>>> + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> + execlists_clear_active(execlists,
>>> + EXECLISTS_ACTIVE_HWACK);
>>> +
>>> + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> + continue;
>>> +
>>> + /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> + }
>>> - /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> - if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> - buf[2*head + 1] ==
>>> execlists->preempt_complete_status) {
>>> + /*
>>> + * Check if preempted to real idle, either directly or
>>> + * the preemptive context already finished executing
>>> + */
>>> + if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> + (status & GEN8_CTX_STATUS_COMPLETE &&
>>> + buf[2*head + 1] ==
>>> execlists->preempt_complete_status)) {
>>> GEM_TRACE("%s preempt-idle\n", engine->name);
>>> complete_preempt_context(execlists);
>>> continue;
>>> @@ -2337,7 +2379,8 @@ static void
>>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>> engine->unpark = NULL;
>>> engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>> - if (engine->i915->preempt_context)
>>> + if (engine->i915->preempt_context ||
>>> + HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>> engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>> engine->i915->caps.scheduler =
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h
>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 1593194..3249e9b 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -45,6 +45,7 @@
>>> #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base
>>> + 0x510)
>>> #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base +
>>> 0x550)
>>> #define EL_CTRL_LOAD (1 << 0)
>>> +#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)
>>> /* The docs specify that the write pointer wraps around after 5h,
>>> "After status
>>> * is written out to the last available status QW at offset 5h,
>>> this pointer
>>>
>
More information about the Intel-gfx
mailing list