[Intel-gfx] [PATCH 08/21] drm/i915/guc: Make use of the SW counter field in the context descriptor

Lis, Tomasz tomasz.lis at intel.com
Fri Aug 31 15:31:39 UTC 2018



On 2018-08-30 16:15, Lis, Tomasz wrote:
>
>
> On 2018-08-30 02:08, Lionel Landwerlin wrote:
>> On 29/08/2018 20:16, Michal Wajdeczko wrote:
>>> The new context descriptor format contains two assignable fields:
>>> the SW Context ID (technically 11 bits, but practically limited to 2032
>>> entries due to some being reserved for future use by the GuC) and the
>>> SW Counter (6 bits).
>>>
>>> We don't want to limit ourselves too much in the maximum number of
>>> concurrent contexts we want to allow, so ideally we want to employ
>>> every possible bit available. Unfortunately, a further limitation in
>>> the interface with the GuC means the combination of SW Context ID +
>>> SW Counter has to be unique within the same engine class (as we use
>>> the SW Context ID to index in the GuC stage descriptor pool, and the
>>> Engine Class + SW Counter to index in the 2-dimensional lrc array).
>>> This essentially means we need to somehow encode the engine instance.
>>>
>>> Since the BSpec allows 6 bits for engine instance, we use the whole
>>> SW counter for this task. If the limitation of 2032 maximum 
>>> simultaneous
>>> contexts is too restrictive, we can always squeeze things a bit more
>>> (3 extras bits for hw_id, 3 bits for instance) and things will still
>>> work (Gen11 does not instance more than 8 engines of any class).
>>>
>>> Another alternative would be to generate the hw_id per HW context
>>> instead of per GEM context, but that has other problems (e.g. maximum
>>> number of user-created contexts would be variable, no relationship
>>> between a GuC principal descriptor and the proxy descriptor it uses, 
>>> ...)
>>>
>>> Bspec: 12254
>>>
>>> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
>>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
>>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> Cc: Michel Thierry <michel.thierry at intel.com>
> Tested-by: Tomasz Lis <tomasz.lis at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h         | 15 +++++++++++----
>>>   drivers/gpu/drm/i915/i915_gem_context.c |  5 ++++-
>>>   drivers/gpu/drm/i915/i915_gem_context.h |  2 ++
>>>   drivers/gpu/drm/i915/i915_reg.h         |  2 ++
>>>   drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++++++---
>>>   5 files changed, 28 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index e5b9d3c..34f5495 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -1866,14 +1866,21 @@ struct drm_i915_private {
>>>           struct llist_head free_list;
>>>           struct work_struct free_work;
>>>   -        /* The hw wants to have a stable context identifier for the
>>> +        /*
>>> +         * The HW wants to have a stable context identifier for the
>>>            * lifetime of the context (for OA, PASID, faults, etc).
>>>            * This is limited in execlists to 21 bits.
>>> +         * In enhanced execlist (GEN11+) this is limited to 11 bits
>>> +         * (the SW Context ID field) but GuC limits it a bit further
>>> +         * (11 bits - 16) due to some entries being reserved for 
>>> future
>>> +         * use (so the firmware only supports a GuC stage descriptor
>>> +         * pool of 2032 entries).
>>>            */
>>>           struct ida hw_ida;
>>> -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
>>> -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
>>> -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
>>> +#define MAX_CONTEXT_HW_ID            (1 << 21) /* exclusive */
>>> +#define MAX_GUC_CONTEXT_HW_ID            (1 << 20) /* exclusive */
>>> +#define GEN11_MAX_CONTEXT_HW_ID            (1 << 11) /* exclusive */
>>> +#define GEN11_MAX_CONTEXT_HW_ID_WITH_GUC (GEN11_MAX_CONTEXT_HW_ID - 
>>> 16)
>>>       } contexts;
>>>         u32 fdi_rx_config;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index f15a039..e3b500c 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -209,7 +209,10 @@ static int assign_hw_id(struct drm_i915_private 
>>> *dev_priv, unsigned *out)
>>>       unsigned int max;
>>>         if (INTEL_GEN(dev_priv) >= 11) {
>>> -        max = GEN11_MAX_CONTEXT_HW_ID;
>>> +        if (USES_GUC_SUBMISSION(dev_priv))
>>> +            max = GEN11_MAX_CONTEXT_HW_ID_WITH_GUC;
>>> +        else
>>> +            max = GEN11_MAX_CONTEXT_HW_ID;
>>>       } else {
>>>           /*
>>>            * When using GuC in proxy submission, GuC consumes the
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
>>> b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 851dad6..4b87f5d 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -154,6 +154,8 @@ struct i915_gem_context {
>>>           struct intel_ring *ring;
>>>           u32 *lrc_reg_state;
>>>           u64 lrc_desc;
>>> +        u32 sw_context_id;
>>> +        u32 sw_counter;
>>>           int pin_count;
>>>             const struct intel_context_ops *ops;
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index f232178..ea65d7b 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -3900,6 +3900,8 @@ enum {
>>>   #define GEN8_CTX_ID_WIDTH 21
>>>   #define GEN11_SW_CTX_ID_SHIFT 37
>>>   #define GEN11_SW_CTX_ID_WIDTH 11
>>> +#define GEN11_SW_COUNTER_SHIFT 55
>>> +#define GEN11_SW_COUNTER_WIDTH 6
>>>   #define GEN11_ENGINE_CLASS_SHIFT 61
>>>   #define GEN11_ENGINE_CLASS_WIDTH 3
>>>   #define GEN11_ENGINE_INSTANCE_SHIFT 48
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index f4b9972..3001a14 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -240,14 +240,15 @@ static inline bool need_preempt(const struct 
>>> intel_engine_cs *engine,
>>>        * anything below.
>>>        */
>>>       if (INTEL_GEN(ctx->i915) >= 11) {
>>
>>
>> Hey Michal,
>>
>> There is a comment just above the if () about updating the 
>> i915_perf.c (oa_get_render_ctx_id) when descriptor is updated.
>> Otherwise this will break some part of the observability feature.
>> You can verify this with the IGT tests/perf 
>> --run-subtest=gen8-unprivileged-single-ctx-counters
>>
>> Thanks a lot,
>>
>> -
>> Lionel
> Tested on KBL; works fine for both enable_guc=2 and enable_guc=3.
>
> ./tests/perf --run-subtest=gen8-unprivileged-single-ctx-counters
> IGT-Version: 1.22-g11db680 (x86_64) (Linux: 4.19.0-rc1tli+ x86_64)
> Subtest gen8-unprivileged-single-ctx-counters: SUCCESS (0,058s)
>
> -Tomasz
Took a bit longer, but tested on ICL as well. Test passes for 
enable_guc=1 and 3.
There is still an issue with enable_guc=2, but that's not related to 
observability feature (and out of scope of this review).

-Tomasz
>>> -        GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
>>> -        desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
>>> +        GEM_BUG_ON(ce->sw_context_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
>>> +        desc |= (u64)ce->sw_context_id << GEN11_SW_CTX_ID_SHIFT;
>>>                                   /* bits 37-47 */
>>>             desc |= (u64)engine->instance << 
>>> GEN11_ENGINE_INSTANCE_SHIFT;
>>>                                   /* bits 48-53 */
>>>   -        /* TODO: decide what to do with SW counter (bits 55-60) */
>>> +        desc |= (u64)ce->sw_counter << GEN11_SW_COUNTER_SHIFT;
>>> +                                /* bits 55-60 */
>>>             /*
>>>            * Although GuC will never see this upper part as it fills
>>> @@ -2771,6 +2772,11 @@ static int 
>>> execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>>>       ce->ring = ring;
>>>       ce->state = vma;
>>>   +    if (INTEL_GEN(ctx->i915) >= 11) {
>>> +        ce->sw_context_id = ctx->hw_id;
>>> +        ce->sw_counter = engine->instance;
>>> +    }
>>> +
>>>       return 0;
>>>     error_ring_free:
>>
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx



More information about the Intel-gfx mailing list