[Intel-gfx] [PATCH 3/4] drm/i915/tgl: Gen12 csb support

Wed Jul 31 17:33:31 UTC 2019

On 7/30/19 11:29 PM, Tvrtko Ursulin wrote:
> 
> On 31/07/2019 01:49, Daniele Ceraolo Spurio wrote:
>> The CSB format has been reworked for Gen12 to include information on
>> both the context we're switching away from and the context we're
>> switching to. After the change, some of the events don't have their
>> own bit anymore and need to be inferred from other values in the csb.
>> One of the context IDs (0x7FF) has also been reserved to indicate
>> the invalid ctx, i.e. engine idle.
>>
>> Note that the full context ID includes the SW counter as well, but since
>> we currently only care if the context is valid or not we can ignore that
>> part.
>>
>> Bspec: 45555, 46144
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 ++++
>>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 73 +++++++++++++++++---
>>   2 files changed, 78 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
>> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index da61dd329210..98adc764d4f8 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -139,6 +139,13 @@ struct st_preempt_hang {
>>       bool inject_hang;
>>   };
>> +enum intel_csb_step {
>> +    CSB_NOP,
>> +    CSB_PROMOTE,
>> +    CSB_PREEMPT,
>> +    CSB_COMPLETE,
>> +};
>> +
>>   /**
>>    * struct intel_engine_execlists - execlist submission queue and 
>> port state
>>    *
>> @@ -251,6 +258,12 @@ struct intel_engine_execlists {
>>        */
>>       u8 csb_head;
>> +    /**
>> +     * @csb_parse: platform-specific function to parse the status buffer
>> +     */
>> +    enum intel_csb_step
>> +    (*csb_parse)(const struct intel_engine_execlists *, const u32 *csb);
> 
> Nitpick - inconsistent naming and not-naming function parameters.
> 
>> +
>>       I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
>>   };
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index c379184ac987..00afdcd71bd4 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -163,6 +163,13 @@
>>   #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
>> +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE    (0x1) /* lower csb 
>> dword */
>> +#define GEN12_CTX_SWITCH_DETAIL(csb_dw)    ((csb_dw) & 0xF) /* upper 
>> csb dword */
>> +#define GEN12_CSB_SW_CTX_ID_MASK        GENMASK_ULL(25, 15)
> 
> Does this need to be ULL? Seems to only been used on u32 types. Maybe it 
> makes no practical difference...

Nope, the RFC worked on the 64b status and so needed ULL, I forgot to 
scale it down when moving to working on the separate dwords.

> 
>> +#define GEN12_IDLE_CTX_ID        0x7FF
>> +#define GEN12_CSB_CTX_VALID(csb_dw)    \
>> +    (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
>> +
>>   /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>>   #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>>   #define WA_TAIL_DWORDS 2
>> @@ -1315,14 +1322,59 @@ reset_in_progress(const struct 
>> intel_engine_execlists *execlists)
>>       return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>>   }
>> -enum csb_step {
>> -    CSB_NOP,
>> -    CSB_PROMOTE,
>> -    CSB_PREEMPT,
>> -    CSB_COMPLETE,
>> -};
>> +/*
>> + * Starting with Gen12, the status has a new format:
>> + *
>> + *     bit  0:     switched to new queue
>> + *     bit  1:     reserved
>> + *     bit  2:     semaphore wait mode (poll or signal), Only valid when
> 
> Nitpick - random capitalized word in a sea of non-sentences.
> 
>> + *                 switch detail is set to "wait on semaphore"
>> + *     bits 3-5:   engine class
>> + *     bits 6-11:  engine instance
>> + *     bits 12-14: reserved
>> + *     bits 15-25: sw context id of the lrc we're switching to
>> + *     bits 26-31: sw counter of the lrc we're switching to
> 
> I'd perhaps drop the "we are" language from here since it is not we 
> (driver) but represents what GPU thinks is happening. "sw context id of 
> the lrc GPU switched to"?
> 

ack

>> + *     bits 32-35: context switch detail
>> + *                  - 0: ctx complete
>> + *                  - 1: wait on sync flip
>> + *                  - 2: wait on vblank
>> + *                  - 3: wait on scanline
>> + *                  - 4: wait on semaphore
>> + *                  - 5: context preempted (not on SEMAPHORE_WAIT or
>> + *                       WAIT_FOR_EVENT)
>> + *     bit  36:    reserved
>> + *     bits 37-43: wait detail (for switch detail 1 to 4)
>> + *     bits 44-46: reserved
>> + *     bits 47-57: sw context id of the lrc we're switching away from
>> + *     bits 58-63: sw counter of the lrc we're switching away from
>> + */
>> +static enum intel_csb_step
>> +gen12_csb_parse(const struct intel_engine_execlists *execlists, const 
>> u32 *csb)
>> +{
>> +    u32 lower_dw = csb[0];
>> +    u32 upper_dw = csb[1];
>> +    bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
>> +    bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
>> +    bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>> +
>> +    if (!ctx_away_valid && ctx_to_valid)
>> +        return CSB_PROMOTE;
>> +
>> +    if (new_queue && ctx_away_valid)
>> +        return CSB_PREEMPT;
>> -static inline enum csb_step
>> +    /* we do not expect a ctx switch on unsuccessful wait */
> 
> What is a wait in this context? Oh the wait ctx switch detail values. 

This is the case where the context switches out on a WAIT_FOR_EVENT or 
SEMAPHORE_WAIT because the instruction has been set to switch out of the 
condition is not satisfied (i.e. not polling mode)

> What about 5 = preempted? Guaranteed to be handled already with the 
> "new_queue & ctx_away_valid" check? Should you add 
> GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(udw) != 5) on that branch? And expand 
> the comment against this assert to explain the above?
> 

It should be guaranteed by the case above, but we can't add a BUG_ON 
above because when preempting a polling semaphore we get switch_detail = 
4 and on a lite restore the switch_detail is undefined.

>> +    GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
>> +
>> +    if (*execlists->active) {
>> +        GEM_BUG_ON(!ctx_away_valid);
>> +        return CSB_COMPLETE;
>> +    }
>> +
>> +    return CSB_NOP;
>> +}
>> +
>> +static enum intel_csb_step
>>   csb_parse(const struct intel_engine_execlists *execlists, const u32 
>> *csb)
>>   {
>>       unsigned int status = *csb;
>> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs 
>> *engine)
>>                 engine->name, head,
>>                 buf[2 * head + 0], buf[2 * head + 1]);
>> -        switch (csb_parse(execlists, buf + 2 * head)) {
>> +        switch (execlists->csb_parse(execlists, buf + 2 * head)) {
>>           case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
>>               trace_ports(execlists, "preempted", execlists->active);
>> @@ -2878,6 +2930,11 @@ int intel_execlists_submission_init(struct 
>> intel_engine_cs *engine)
>>       else
>>           execlists->csb_size = GEN11_CSB_ENTRIES;
>> +    if (INTEL_GEN(i915) >= 12)
>> +        execlists->csb_parse = gen12_csb_parse;
>> +    else
>> +        execlists->csb_parse = csb_parse;
>> +
>>       reset_csb_pointers(engine);
>>       return 0;
>>
> 
> Looks simple. Do you have a reviewer who already knows how Gen12 CSB 
> works or someone will need to read up on it to give a proper r-b?

Unfortunately I need a new reviewer (Michel was the one with the 
knowledge). Are you volunteering? :)

Daniele

> 
> Regards,
> 
> Tvrtko