[Intel-gfx] [PATCH] drm/i915/gt: Split logical ring contexts from execlist submission

Mon Dec 14 22:29:46 UTC 2020

Quoting Daniele Ceraolo Spurio (2020-12-14 22:11:01)
> <snip>
> 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > new file mode 100644
> > index 000000000000..3d3e408a87a9
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > @@ -0,0 +1,114 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_LRC_H__
> > +#define __INTEL_LRC_H__
> > +
> > +#include <linux/types.h>
> > +
> > +#include "intel_context.h"
> > +#include "intel_lrc_reg.h"
> > +
> > +struct drm_i915_gem_object;
> > +struct intel_engine_cs;
> > +struct intel_ring;
> > +
> > +/* At the start of the context image is its per-process HWS page */
> > +#define LRC_PPHWSP_PN        (0)
> > +#define LRC_PPHWSP_SZ        (1)
> > +/* After the PPHWSP we have the logical state for the context */
> > +#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
> > +#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
> > +
> > +/* Space within PPHWSP reserved to be used as scratch */
> > +#define LRC_PPHWSP_SCRATCH           0x34
> > +#define LRC_PPHWSP_SCRATCH_ADDR              (LRC_PPHWSP_SCRATCH * sizeof(u32))
> > +
> > +int lrc_init_wa_ctx(struct intel_engine_cs *engine);
> > +void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
> > +
> > +int lrc_alloc(struct intel_context *ce,
> > +           struct intel_engine_cs *engine);
> > +void lrc_reset(struct intel_context *ce,
> > +            struct intel_engine_cs *engine,
> > +            u32 head,
> > +            bool scrub);
> > +
> > +void lrc_init_regs(const struct intel_context *ce,
> > +                const struct intel_engine_cs *engine,
> > +                const struct intel_ring *ring,
> > +                bool close);
> > +void lrc_update_regs(const struct intel_context *ce,
> > +                  const struct intel_engine_cs *engine,
> > +                  u32 head);
> > +void lrc_reset_regs(const struct intel_context *ce,
> > +                 const struct intel_engine_cs *engine);
> > +
> > +void lrc_restore_defaults(struct intel_context *ce,
> > +                       struct intel_engine_cs *engine);
> > +
> > +void lrc_update_offsets(struct intel_context *ce,
> > +                     struct intel_engine_cs *engine);
> > +
> > +void lrc_check_regs(const struct intel_context *ce,
> > +                 const struct intel_engine_cs *engine,
> > +                 const char *when);
> > +void lrc_check_redzone(struct intel_context *ce);
> > +
> > +static inline u32 lrc_get_runtime(const struct intel_context *ce)
> > +{
> > +     /*
> > +      * We can use either ppHWSP[16] which is recorded before the context
> > +      * switch (and so excludes the cost of context switches) or use the
> > +      * value from the context image itself, which is saved/restored earlier
> > +      * and so includes the cost of the save.
> > +      */
> > +     return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
> > +}
> > +
> > +/*
> > + * The context descriptor encodes various attributes of a context,
> > + * including its GTT address and some flags. Because it's fairly
> > + * expensive to calculate, we'll just do it once and cache the result,
> > + * which remains valid until the context is unpinned.
> > + *
> > + * This is what a descriptor looks like, from LSB to MSB::
> > + *
> > + *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
> > + *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
> > + *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
> > + *      bits 53-54:    mbz, reserved for use by hardware
> > + *      bits 55-63:    group ID, currently unused and set to 0
> > + *
> > + * Starting from Gen11, the upper dword of the descriptor has a new format:
> > + *
> > + *      bits 32-36:    reserved
> > + *      bits 37-47:    SW context ID
> > + *      bits 48:53:    engine instance
> > + *      bit 54:        mbz, reserved for use by hardware
> > + *      bits 55-60:    SW counter
> > + *      bits 61-63:    engine class
> > + *
> > + * engine info, SW context ID and SW counter need to form a unique number
> > + * (Context ID) per lrc.
> > + */
> > +static inline u32
> > +lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
> > +{
> > +     u32 desc;
> > +
> > +     desc = INTEL_LEGACY_32B_CONTEXT;
> > +     if (i915_vm_is_4lvl(ce->vm))
> > +             desc = INTEL_LEGACY_64B_CONTEXT;
> > +     desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
> > +
> > +     desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
> > +     if (IS_GEN(engine->i915, 8))
> > +             desc |= GEN8_CTX_L3LLC_COHERENT;
> > +
> > +     return i915_ggtt_offset(ce->state) | desc;
> > +}
> Personal preference: I'd avoid having this as a static inline to not 
> have a direct dependency to i915_drv.h. Maybe we can split this in 2 
> parts, an init part where we set all the flags at context alloc time and 
> an update part where we rmw the address in, and only inline the latter?

The inline is on the overkill side, we only update the descriptor flags
on repinning the context.

Hmm, it might be nice to return the flags from lrc_update_regs().

> +static struct i915_vma *create_scratch(struct intel_gt *gt)
> 
> There is already several copies of this create_scratch() in the 
> selftests code (execlists, mocs and now lrc). Do we now have enough 
> usages to move it to a common file? Can be done as a follow up.

If some one notices me copy'n'pasting routines, then yes.
3's the arbitrary threshold for refactoring to a common helper.
-Chris