[Intel-gfx] [PATCH 08/27] drm/i915: Add logical engine mapping
Matthew Brost
matthew.brost at intel.com
Mon Sep 13 16:50:30 UTC 2021
On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:
>
> On 10/09/2021 20:49, Matthew Brost wrote:
> > On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
> > >
> > > On 20/08/2021 23:44, Matthew Brost wrote:
> > > > Add logical engine mapping. This is required for split-frame, as
> > > > workloads need to be placed on engines in a logically contiguous manner.
> > > >
> > > > v2:
> > > > (Daniel Vetter)
> > > > - Add kernel doc for new fields
> > > >
> > > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > > ---
> > > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 60 ++++++++++++++++---
> > > > drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 ++
> > > > .../drm/i915/gt/intel_execlists_submission.c | 1 +
> > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +-
> > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
> > > > 5 files changed, 60 insertions(+), 29 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > index 0d9105a31d84..4d790f9a65dd 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
> > > > GEM_DEBUG_WARN_ON(iir);
> > > > }
> > > > -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > > +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
> > > > + u8 logical_instance)
> > > > {
> > > > const struct engine_info *info = &intel_engines[id];
> > > > struct drm_i915_private *i915 = gt->i915;
> > > > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > > engine->class = info->class;
> > > > engine->instance = info->instance;
> > > > + engine->logical_mask = BIT(logical_instance);
> > > > __sprint_engine_name(engine);
> > > > engine->props.heartbeat_interval_ms =
> > > > @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
> > > > return info->engine_mask;
> > > > }
> > > > +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
> > > > + u8 class, const u8 *map, u8 num_instances)
> > > > +{
> > > > + int i, j;
> > > > + u8 current_logical_id = 0;
> > > > +
> > > > + for (j = 0; j < num_instances; ++j) {
> > > > + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > + if (!HAS_ENGINE(gt, i) ||
> > > > + intel_engines[i].class != class)
> > > > + continue;
> > > > +
> > > > + if (intel_engines[i].instance == map[j]) {
> > > > + logical_ids[intel_engines[i].instance] =
> > > > + current_logical_id++;
> > > > + break;
> > > > + }
> > > > + }
> > > > + }
> > > > +}
> > > > +
> > > > +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
> > > > +{
> > > > + int i;
> > > > + u8 map[MAX_ENGINE_INSTANCE + 1];
> > > > +
> > > > + for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> > > > + map[i] = i;
> > >
> > > What's the point of the map array since it is 1:1 with instance?
> > >
> >
> > Future products do not have a 1 to 1 mapping and that mapping can change
> > based on fusing, e.g. XeHP SDV.
> >
> > Also technically ICL / TGL / ADL physical instance 2 maps to logical
> > instance 1.
>
> I don't follow the argument. All I can see is that "map[i] = i" always in
> the proposed code, which is then used to check "instance == map[instance]".
> So I'd suggest to remove this array from the code until there is a need for
> it.
>
Ok, this logic is slightly confusing and makes more sense once we have
non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
with the value in map[i] being a physical instance. Populate_logical_ids
searches the map finding all physical instances present in the map
assigning each found instance a new logical id increasing by 1 each
time.
e.g. If the map is setup 0-N and only physical instance 0 / 2 are
present they will get logical mapping 0 / 1 respectfully.
This algorithm works for non-standard mappings too /w fused parts. e.g.
on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the
physical instances can't be found due to fusing the logical mapping is
still correct per the bspec.
This array is absolutely needed for multi-lrc submission to work, even
on ICL / TGL / ADL as the GuC only supports logically contiguous engine
instances.
> > > > + populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
> > > > +}
> > > > +
> > > > /**
> > > > * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
> > > > * @gt: pointer to struct intel_gt
> > > > @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > > struct drm_i915_private *i915 = gt->i915;
> > > > const unsigned int engine_mask = init_engine_mask(gt);
> > > > unsigned int mask = 0;
> > > > - unsigned int i;
> > > > + unsigned int i, class;
> > > > + u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
> > > > int err;
> > > > drm_WARN_ON(&i915->drm, engine_mask == 0);
> > > > @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > > if (i915_inject_probe_failure(i915))
> > > > return -ENODEV;
> > > > - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> > > > - if (!HAS_ENGINE(gt, i))
> > > > - continue;
> > > > + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
> > > > + setup_logical_ids(gt, logical_ids, class);
> > > > - err = intel_engine_setup(gt, i);
> > > > - if (err)
> > > > - goto cleanup;
> > > > + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > + u8 instance = intel_engines[i].instance;
> > > > +
> > > > + if (intel_engines[i].class != class ||
> > > > + !HAS_ENGINE(gt, i))
> > > > + continue;
> > > > - mask |= BIT(i);
> > > > + err = intel_engine_setup(gt, i,
> > > > + logical_ids[instance]);
> > > > + if (err)
> > > > + goto cleanup;
> > > > +
> > > > + mask |= BIT(i);
> > >
> > > I still this there is a less clunky way to set this up in less code and more
> > > readable at the same time. Like do it in two passes so you can iterate
> > > gt->engine_class[] array instead of having to implement a skip condition
> > > (both on class and HAS_ENGINE at two places) and also avoid walking the flat
> > > intel_engines array recursively.
> > >
> >
> > Kinda a bikeshed arguing about a pretty simple loop structure, don't you
> > think? I personally like the way it laid out.
> >
> > Pseudo code for your suggestion?
>
> Leave the existing setup loop as is and add an additional "for engine class"
> walk after it. That way you can walk already setup gt->engine_class[] array
> so wouldn't need to skip wrong classes and have HAS_ENGINE checks when
> walking the flat intel_engines[] array several times. It also applies to the
> helper which counts logical instances per class.
>
Ok, I think I see what you are getting at. Again IMO this is a total
bikeshed as this is 1 time setup step that we really should only care if
the loop works or not rather than it being optimized / looks a way a
certain person wants. I can change this if you really insist but again
IMO disucssing this is a total waste of energy.
> > > > + }
> > > > }
> > > > /*
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > index ed91bcff20eb..fddf35546b58 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > @@ -266,6 +266,11 @@ struct intel_engine_cs {
> > > > unsigned int guc_id;
> > > > intel_engine_mask_t mask;
> > > > + /**
> > > > + * @logical_mask: logical mask of engine, reported to user space via
> > > > + * query IOCTL and used to communicate with the GuC in logical space
> > > > + */
> > > > + intel_engine_mask_t logical_mask;
> > >
> > > You could prefix the new field with uabi_ to match the existing scheme and
> > > to signify to anyone who might be touching it in the future it should not be
> > > changed.
> >
> > This is kinda uabi, but also kinda isn't. We do report a logical
> > instance via IOCTL but it also really is tied the GuC backend as we only
> > can communicate with the GuC in logical space. IMO we should leave as
> > is.
>
> Perhaps it would be best to call the new field uabi_logical_instance so it's
> clear it is reported in the query directly and do the BIT() transformation
> in the GuC backend?
>
Virtual engines can have a multiple bits set in this mask, so this is
used for both the query on physical engines via UABI and submission in
the GuC backend.
> >
> > >
> > > Also, I think comment should explain what is logical space ie. how the
> > > numbering works.
> > >
> >
> > Don't I already do that? I suppose I could add something like:
>
> Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new query)
> or here.
>
/**
* @logical_mask: logical mask of engine, reported to user space via
* query IOCTL and used to communicate with the GuC in logical space
*/
> >
> > The logical mask within engine class must be contiguous across all
> > instances.
>
> Best not to start mentioning the mask for the first time. Just explain what
> logical numbering is in terms of how engines are enumerated in order of
> physical instances but skipping the fused off ones. In the kerneldoc for the
> new query is I think the right place.
>
Maybe I can add:
The logical mapping is defined on per part basis in the bspec and can
very based the parts fusing.
> > > Not sure the part about GuC needs to be in the comment since uapi is
> > > supposed to be backend agnostic.
> > >
> >
> > Communicating with the GuC in logical space is a pretty key point here.
> > The communication with the GuC in logical space is backend specific but
> > how our hardware works (e.g. split frame workloads must be placed
> > logical contiguous) is not. Mentioning the GuC requirement here makes
> > sense to me for completeness.
>
> Yeah might be, I was thinking more about the new query. Query definitely is
> backend agnostic but yes it is fine to say in the comment here the new field
> is used both for the query and for communicating with GuC.
>
Sounds good, will make it clear it used for the query and from
communicating with the GuC.
Matt
> Regards,
>
> Tvrtko
>
> >
> > Matt
> >
> > > Regards,
> > >
> > > Tvrtko
> > >
> > > > u8 class;
> > > > u8 instance;
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > index cafb0608ffb4..813a6de01382 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > > ve->siblings[ve->num_siblings++] = sibling;
> > > > ve->base.mask |= sibling->mask;
> > > > + ve->base.logical_mask |= sibling->logical_mask;
> > > > /*
> > > > * All physical engines must be compatible for their emission
> > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > index 6926919bcac6..9f5f43a16182 100644
> > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
> > > > for_each_engine(engine, gt, id) {
> > > > u8 guc_class = engine_class_to_guc_class(engine->class);
> > > > - system_info->mapping_table[guc_class][engine->instance] =
> > > > + system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
> > > > engine->instance;
> > > > }
> > > > }
> > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > index e0eed70f9b92..ffafbac7335e 100644
> > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
> > > > return __guc_action_deregister_context(guc, guc_id, loop);
> > > > }
> > > > -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> > > > -{
> > > > - switch (class) {
> > > > - case RENDER_CLASS:
> > > > - return mask >> RCS0;
> > > > - case VIDEO_ENHANCEMENT_CLASS:
> > > > - return mask >> VECS0;
> > > > - case VIDEO_DECODE_CLASS:
> > > > - return mask >> VCS0;
> > > > - case COPY_ENGINE_CLASS:
> > > > - return mask >> BCS0;
> > > > - default:
> > > > - MISSING_CASE(class);
> > > > - return 0;
> > > > - }
> > > > -}
> > > > -
> > > > static void guc_context_policy_init(struct intel_engine_cs *engine,
> > > > struct guc_lrc_desc *desc)
> > > > {
> > > > @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> > > > desc = __get_lrc_desc(guc, desc_idx);
> > > > desc->engine_class = engine_class_to_guc_class(engine->class);
> > > > - desc->engine_submit_mask = adjust_engine_mask(engine->class,
> > > > - engine->mask);
> > > > + desc->engine_submit_mask = engine->logical_mask;
> > > > desc->hw_context_desc = ce->lrc.lrca;
> > > > desc->priority = ce->guc_state.prio;
> > > > desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> > > > @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > > }
> > > > ve->base.mask |= sibling->mask;
> > > > + ve->base.logical_mask |= sibling->logical_mask;
> > > > if (n != 0 && ve->base.class != sibling->class) {
> > > > DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> > > >
More information about the Intel-gfx
mailing list