[Intel-gfx] [PATCH 3/4] drm/i915/tgl: Gen12 csb support
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Wed Jul 31 17:33:31 UTC 2019
On 7/30/19 11:29 PM, Tvrtko Ursulin wrote:
>
> On 31/07/2019 01:49, Daniele Ceraolo Spurio wrote:
>> The CSB format has been reworked for Gen12 to include information on
>> both the context we're switching away from and the context we're
>> switching to. After the change, some of the events don't have their
>> own bit anymore and need to be inferred from other values in the csb.
>> One of the context IDs (0x7FF) has also been reserved to indicate
>> the invalid ctx, i.e. engine idle.
>>
>> Note that the full context ID includes the SW counter as well, but since
>> we currently only care if the context is valid or not we can ignore that
>> part.
>>
>> Bspec: 45555, 46144
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 ++++
>> drivers/gpu/drm/i915/gt/intel_lrc.c | 73 +++++++++++++++++---
>> 2 files changed, 78 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index da61dd329210..98adc764d4f8 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -139,6 +139,13 @@ struct st_preempt_hang {
>> bool inject_hang;
>> };
>> +enum intel_csb_step {
>> + CSB_NOP,
>> + CSB_PROMOTE,
>> + CSB_PREEMPT,
>> + CSB_COMPLETE,
>> +};
>> +
>> /**
>> * struct intel_engine_execlists - execlist submission queue and
>> port state
>> *
>> @@ -251,6 +258,12 @@ struct intel_engine_execlists {
>> */
>> u8 csb_head;
>> + /**
>> + * @csb_parse: platform-specific function to parse the status buffer
>> + */
>> + enum intel_csb_step
>> + (*csb_parse)(const struct intel_engine_execlists *, const u32 *csb);
>
> Nitpick - inconsistent naming and not-naming function parameters.
>
>> +
>> I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
>> };
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index c379184ac987..00afdcd71bd4 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -163,6 +163,13 @@
>> #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
>> +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb
>> dword */
>> +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper
>> csb dword */
>> +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK_ULL(25, 15)
>
> Does this need to be ULL? Seems to only been used on u32 types. Maybe it
> makes no practical difference...
Nope, the RFC worked on the 64b status and so needed ULL, I forgot to
scale it down when moving to working on the separate dwords.
>
>> +#define GEN12_IDLE_CTX_ID 0x7FF
>> +#define GEN12_CSB_CTX_VALID(csb_dw) \
>> + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
>> +
>> /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>> #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>> #define WA_TAIL_DWORDS 2
>> @@ -1315,14 +1322,59 @@ reset_in_progress(const struct
>> intel_engine_execlists *execlists)
>> return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>> }
>> -enum csb_step {
>> - CSB_NOP,
>> - CSB_PROMOTE,
>> - CSB_PREEMPT,
>> - CSB_COMPLETE,
>> -};
>> +/*
>> + * Starting with Gen12, the status has a new format:
>> + *
>> + * bit 0: switched to new queue
>> + * bit 1: reserved
>> + * bit 2: semaphore wait mode (poll or signal), Only valid when
>
> Nitpick - random capitalized word in a sea of non-sentences.
>
>> + * switch detail is set to "wait on semaphore"
>> + * bits 3-5: engine class
>> + * bits 6-11: engine instance
>> + * bits 12-14: reserved
>> + * bits 15-25: sw context id of the lrc we're switching to
>> + * bits 26-31: sw counter of the lrc we're switching to
>
> I'd perhaps drop the "we are" language from here since it is not we
> (driver) but represents what GPU thinks is happening. "sw context id of
> the lrc GPU switched to"?
>
ack
>> + * bits 32-35: context switch detail
>> + * - 0: ctx complete
>> + * - 1: wait on sync flip
>> + * - 2: wait on vblank
>> + * - 3: wait on scanline
>> + * - 4: wait on semaphore
>> + * - 5: context preempted (not on SEMAPHORE_WAIT or
>> + * WAIT_FOR_EVENT)
>> + * bit 36: reserved
>> + * bits 37-43: wait detail (for switch detail 1 to 4)
>> + * bits 44-46: reserved
>> + * bits 47-57: sw context id of the lrc we're switching away from
>> + * bits 58-63: sw counter of the lrc we're switching away from
>> + */
>> +static enum intel_csb_step
>> +gen12_csb_parse(const struct intel_engine_execlists *execlists, const
>> u32 *csb)
>> +{
>> + u32 lower_dw = csb[0];
>> + u32 upper_dw = csb[1];
>> + bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
>> + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
>> + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>> +
>> + if (!ctx_away_valid && ctx_to_valid)
>> + return CSB_PROMOTE;
>> +
>> + if (new_queue && ctx_away_valid)
>> + return CSB_PREEMPT;
>> -static inline enum csb_step
>> + /* we do not expect a ctx switch on unsuccessful wait */
>
> What is a wait in this context? Oh the wait ctx switch detail values.
This is the case where the context switches out on a WAIT_FOR_EVENT or
SEMAPHORE_WAIT because the instruction has been set to switch out of the
condition is not satisfied (i.e. not polling mode)
> What about 5 = preempted? Guaranteed to be handled already with the
> "new_queue & ctx_away_valid" check? Should you add
> GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(udw) != 5) on that branch? And expand
> the comment against this assert to explain the above?
>
It should be guaranteed by the case above, but we can't add a BUG_ON
above because when preempting a polling semaphore we get switch_detail =
4 and on a lite restore the switch_detail is undefined.
>> + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
>> +
>> + if (*execlists->active) {
>> + GEM_BUG_ON(!ctx_away_valid);
>> + return CSB_COMPLETE;
>> + }
>> +
>> + return CSB_NOP;
>> +}
>> +
>> +static enum intel_csb_step
>> csb_parse(const struct intel_engine_execlists *execlists, const u32
>> *csb)
>> {
>> unsigned int status = *csb;
>> @@ -1401,7 +1453,7 @@ static void process_csb(struct intel_engine_cs
>> *engine)
>> engine->name, head,
>> buf[2 * head + 0], buf[2 * head + 1]);
>> - switch (csb_parse(execlists, buf + 2 * head)) {
>> + switch (execlists->csb_parse(execlists, buf + 2 * head)) {
>> case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
>> trace_ports(execlists, "preempted", execlists->active);
>> @@ -2878,6 +2930,11 @@ int intel_execlists_submission_init(struct
>> intel_engine_cs *engine)
>> else
>> execlists->csb_size = GEN11_CSB_ENTRIES;
>> + if (INTEL_GEN(i915) >= 12)
>> + execlists->csb_parse = gen12_csb_parse;
>> + else
>> + execlists->csb_parse = csb_parse;
>> +
>> reset_csb_pointers(engine);
>> return 0;
>>
>
> Looks simple. Do you have a reviewer who already knows how Gen12 CSB
> works or someone will need to read up on it to give a proper r-b?
Unfortunately I need a new reviewer (Michel was the one with the
knowledge). Are you volunteering? :)
Daniele
>
> Regards,
>
> Tvrtko
More information about the Intel-gfx
mailing list