[PATCH 26/47] drm/i915/guc: GuC virtual engines
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Thu Jul 15 01:21:24 UTC 2021
On 6/24/2021 12:04 AM, Matthew Brost wrote:
> Implement GuC virtual engines. Rather simple implementation, basically
> just allocate an engine, setup context enter / exit function to virtual
> engine specific functions, set all other variables / functions to guc
> versions, and set the engine mask to that of all the siblings.
>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_context.c | 19 +-
> drivers/gpu/drm/i915/gem/i915_gem_context.h | 1 +
> drivers/gpu/drm/i915/gt/intel_context_types.h | 10 +
> drivers/gpu/drm/i915/gt/intel_engine.h | 45 +++-
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 +
> .../drm/i915/gt/intel_execlists_submission.c | 186 +++++++------
> .../drm/i915/gt/intel_execlists_submission.h | 11 -
> drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 +-
> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 253 +++++++++++++++++-
> .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 2 +
> 10 files changed, 429 insertions(+), 132 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 5c07e6abf16a..8a9293e0ca92 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -72,7 +72,6 @@
> #include "gt/intel_context_param.h"
> #include "gt/intel_engine_heartbeat.h"
> #include "gt/intel_engine_user.h"
> -#include "gt/intel_execlists_submission.h" /* virtual_engine */
> #include "gt/intel_gpu_commands.h"
> #include "gt/intel_ring.h"
>
> @@ -1568,9 +1567,6 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> if (!HAS_EXECLISTS(i915))
> return -ENODEV;
>
> - if (intel_uc_uses_guc_submission(&i915->gt.uc))
> - return -ENODEV; /* not implement yet */
> -
> if (get_user(idx, &ext->engine_index))
> return -EFAULT;
>
> @@ -1627,7 +1623,7 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> }
> }
>
> - ce = intel_execlists_create_virtual(siblings, n);
> + ce = intel_engine_create_virtual(siblings, n);
> if (IS_ERR(ce)) {
> err = PTR_ERR(ce);
> goto out_siblings;
> @@ -1723,13 +1719,9 @@ set_engines__bond(struct i915_user_extension __user *base, void *data)
> * A non-virtual engine has no siblings to choose between; and
> * a submit fence will always be directed to the one engine.
> */
> - if (intel_engine_is_virtual(virtual)) {
> - err = intel_virtual_engine_attach_bond(virtual,
> - master,
> - bond);
> - if (err)
> - return err;
> - }
> + err = intel_engine_attach_bond(virtual, master, bond);
> + if (err)
> + return err;
> }
>
> return 0;
> @@ -2116,8 +2108,7 @@ static int clone_engines(struct i915_gem_context *dst,
> * the virtual engine instead.
> */
> if (intel_engine_is_virtual(engine))
> - clone->engines[n] =
> - intel_execlists_clone_virtual(engine);
> + clone->engines[n] = intel_engine_clone_virtual(engine);
> else
> clone->engines[n] = intel_context_create(engine);
> if (IS_ERR_OR_NULL(clone->engines[n])) {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index b5c908f3f4f2..ba772762f7b9 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -10,6 +10,7 @@
> #include "i915_gem_context_types.h"
>
> #include "gt/intel_context.h"
> +#include "gt/intel_engine.h"
>
> #include "i915_drv.h"
> #include "i915_gem.h"
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index e7af6a2368f8..6945963a31ba 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -47,6 +47,16 @@ struct intel_context_ops {
>
> void (*reset)(struct intel_context *ce);
> void (*destroy)(struct kref *kref);
> +
> + /* virtual engine/context interface */
> + struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
> + unsigned int count);
> + struct intel_context *(*clone_virtual)(struct intel_engine_cs *engine);
> + struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
> + unsigned int sibling);
> + int (*attach_bond)(struct intel_engine_cs *engine,
> + const struct intel_engine_cs *master,
> + const struct intel_engine_cs *sibling);
Cloning and bonding for VE have been removed, so can be dropped. I'll
skip reviewing all the related code in this patch.
> };
>
> struct intel_context {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index f911c1224ab2..923eaee627b3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -273,13 +273,56 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
> return intel_engine_has_preemption(engine);
> }
>
> +struct intel_context *
> +intel_engine_create_virtual(struct intel_engine_cs **siblings,
> + unsigned int count);
> +
> +static inline bool
> +intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
> +{
> + if (intel_engine_uses_guc(engine))
> + return intel_guc_virtual_engine_has_heartbeat(engine);
> + else
> + GEM_BUG_ON("Only should be called in GuC submission");
> +
> + return false;
> +}
This could use a better explanation. Maybe something like:
static inline bool
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
{
/*
* For non-GuC submission we expect the back-end to look at the
* heartbeat status of the actual physical engine that the work
* has been (or is being) scheduled on, so we should only reach
* here with GuC submission enabled.
*/
GEM_BUG_ON(!intel_engine_uses_guc(engine));
return intel_guc_virtual_engine_has_heartbeat(engine);
}
> +
> static inline bool
> intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
> {
> if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
> return false;
>
> - return READ_ONCE(engine->props.heartbeat_interval_ms);
> + if (intel_engine_is_virtual(engine))
> + return intel_virtual_engine_has_heartbeat(engine);
> + else
> + return READ_ONCE(engine->props.heartbeat_interval_ms);
> +}
> +
> +static inline struct intel_context *
> +intel_engine_clone_virtual(struct intel_engine_cs *src)
> +{
> + GEM_BUG_ON(!intel_engine_is_virtual(src));
> + return src->cops->clone_virtual(src);
> +}
> +
> +static inline int
> +intel_engine_attach_bond(struct intel_engine_cs *engine,
> + const struct intel_engine_cs *master,
> + const struct intel_engine_cs *sibling)
> +{
> + if (!engine->cops->attach_bond)
> + return 0;
> +
> + return engine->cops->attach_bond(engine, master, sibling);
> +}
> +
> +static inline struct intel_engine_cs *
> +intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
> +{
> + GEM_BUG_ON(!intel_engine_is_virtual(engine));
> + return engine->cops->get_sibling(engine, sibling);
> }
>
> #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 88694822716a..d13b1716c29e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1736,6 +1736,20 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
> return total;
> }
>
> +struct intel_context *
> +intel_engine_create_virtual(struct intel_engine_cs **siblings,
> + unsigned int count)
> +{
> + if (count == 0)
> + return ERR_PTR(-EINVAL);
> +
> + if (count == 1)
> + return intel_context_create(siblings[0]);
> +
> + GEM_BUG_ON(!siblings[0]->cops->create_virtual);
> + return siblings[0]->cops->create_virtual(siblings, count);
> +}
> +
> static bool match_ring(struct i915_request *rq)
> {
> u32 ring = ENGINE_READ(rq->engine, RING_START);
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index cdb2126a159a..bd4ced794ff9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -205,6 +205,9 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> return container_of(engine, struct virtual_engine, base);
> }
>
> +static struct intel_context *
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +
> static struct i915_request *
> __active_request(const struct intel_timeline * const tl,
> struct i915_request *rq,
> @@ -2560,6 +2563,8 @@ static const struct intel_context_ops execlists_context_ops = {
>
> .reset = lrc_reset,
> .destroy = lrc_destroy,
> +
> + .create_virtual = execlists_create_virtual,
> };
>
> static int emit_pdps(struct i915_request *rq)
> @@ -3506,6 +3511,94 @@ static void virtual_context_exit(struct intel_context *ce)
> intel_engine_pm_put(ve->siblings[n]);
> }
>
> +static struct intel_engine_cs *
> +virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
> +{
> + struct virtual_engine *ve = to_virtual_engine(engine);
> +
> + if (sibling >= ve->num_siblings)
> + return NULL;
> +
> + return ve->siblings[sibling];
> +}
> +
> +static struct intel_context *
> +virtual_clone(struct intel_engine_cs *src)
> +{
> + struct virtual_engine *se = to_virtual_engine(src);
> + struct intel_context *dst;
> +
> + dst = execlists_create_virtual(se->siblings, se->num_siblings);
> + if (IS_ERR(dst))
> + return dst;
> +
> + if (se->num_bonds) {
> + struct virtual_engine *de = to_virtual_engine(dst->engine);
> +
> + de->bonds = kmemdup(se->bonds,
> + sizeof(*se->bonds) * se->num_bonds,
> + GFP_KERNEL);
> + if (!de->bonds) {
> + intel_context_put(dst);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + de->num_bonds = se->num_bonds;
> + }
> +
> + return dst;
> +}
> +
> +static struct ve_bond *
> +virtual_find_bond(struct virtual_engine *ve,
> + const struct intel_engine_cs *master)
> +{
> + int i;
> +
> + for (i = 0; i < ve->num_bonds; i++) {
> + if (ve->bonds[i].master == master)
> + return &ve->bonds[i];
> + }
> +
> + return NULL;
> +}
> +
> +static int virtual_attach_bond(struct intel_engine_cs *engine,
> + const struct intel_engine_cs *master,
> + const struct intel_engine_cs *sibling)
> +{
> + struct virtual_engine *ve = to_virtual_engine(engine);
> + struct ve_bond *bond;
> + int n;
> +
> + /* Sanity check the sibling is part of the virtual engine */
> + for (n = 0; n < ve->num_siblings; n++)
> + if (sibling == ve->siblings[n])
> + break;
> + if (n == ve->num_siblings)
> + return -EINVAL;
> +
> + bond = virtual_find_bond(ve, master);
> + if (bond) {
> + bond->sibling_mask |= sibling->mask;
> + return 0;
> + }
> +
> + bond = krealloc(ve->bonds,
> + sizeof(*bond) * (ve->num_bonds + 1),
> + GFP_KERNEL);
> + if (!bond)
> + return -ENOMEM;
> +
> + bond[ve->num_bonds].master = master;
> + bond[ve->num_bonds].sibling_mask = sibling->mask;
> +
> + ve->bonds = bond;
> + ve->num_bonds++;
> +
> + return 0;
> +}
> +
> static const struct intel_context_ops virtual_context_ops = {
> .flags = COPS_HAS_INFLIGHT,
>
> @@ -3520,6 +3613,10 @@ static const struct intel_context_ops virtual_context_ops = {
> .exit = virtual_context_exit,
>
> .destroy = virtual_context_destroy,
> +
> + .clone_virtual = virtual_clone,
> + .get_sibling = virtual_get_sibling,
> + .attach_bond = virtual_attach_bond,
> };
>
> static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> @@ -3668,20 +3765,6 @@ static void virtual_submit_request(struct i915_request *rq)
> spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags);
> }
>
> -static struct ve_bond *
> -virtual_find_bond(struct virtual_engine *ve,
> - const struct intel_engine_cs *master)
> -{
> - int i;
> -
> - for (i = 0; i < ve->num_bonds; i++) {
> - if (ve->bonds[i].master == master)
> - return &ve->bonds[i];
> - }
> -
> - return NULL;
> -}
> -
> static void
> virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> {
> @@ -3704,20 +3787,13 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> to_request(signal)->execution_mask &= ~allowed;
> }
>
> -struct intel_context *
> -intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> - unsigned int count)
> +static struct intel_context *
> +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> {
> struct virtual_engine *ve;
> unsigned int n;
> int err;
>
> - if (count == 0)
> - return ERR_PTR(-EINVAL);
> -
> - if (count == 1)
> - return intel_context_create(siblings[0]);
> -
> ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
> if (!ve)
> return ERR_PTR(-ENOMEM);
> @@ -3850,70 +3926,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> return ERR_PTR(err);
> }
>
> -struct intel_context *
> -intel_execlists_clone_virtual(struct intel_engine_cs *src)
> -{
> - struct virtual_engine *se = to_virtual_engine(src);
> - struct intel_context *dst;
> -
> - dst = intel_execlists_create_virtual(se->siblings,
> - se->num_siblings);
> - if (IS_ERR(dst))
> - return dst;
> -
> - if (se->num_bonds) {
> - struct virtual_engine *de = to_virtual_engine(dst->engine);
> -
> - de->bonds = kmemdup(se->bonds,
> - sizeof(*se->bonds) * se->num_bonds,
> - GFP_KERNEL);
> - if (!de->bonds) {
> - intel_context_put(dst);
> - return ERR_PTR(-ENOMEM);
> - }
> -
> - de->num_bonds = se->num_bonds;
> - }
> -
> - return dst;
> -}
> -
> -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> - const struct intel_engine_cs *master,
> - const struct intel_engine_cs *sibling)
> -{
> - struct virtual_engine *ve = to_virtual_engine(engine);
> - struct ve_bond *bond;
> - int n;
> -
> - /* Sanity check the sibling is part of the virtual engine */
> - for (n = 0; n < ve->num_siblings; n++)
> - if (sibling == ve->siblings[n])
> - break;
> - if (n == ve->num_siblings)
> - return -EINVAL;
> -
> - bond = virtual_find_bond(ve, master);
> - if (bond) {
> - bond->sibling_mask |= sibling->mask;
> - return 0;
> - }
> -
> - bond = krealloc(ve->bonds,
> - sizeof(*bond) * (ve->num_bonds + 1),
> - GFP_KERNEL);
> - if (!bond)
> - return -ENOMEM;
> -
> - bond[ve->num_bonds].master = master;
> - bond[ve->num_bonds].sibling_mask = sibling->mask;
> -
> - ve->bonds = bond;
> - ve->num_bonds++;
> -
> - return 0;
> -}
> -
> void intel_execlists_show_requests(struct intel_engine_cs *engine,
> struct drm_printer *m,
> void (*show_request)(struct drm_printer *m,
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> index 4ca9b475e252..74041b1994af 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> @@ -32,15 +32,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
> int indent),
> unsigned int max);
>
> -struct intel_context *
> -intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> - unsigned int count);
> -
> -struct intel_context *
> -intel_execlists_clone_virtual(struct intel_engine_cs *src);
> -
> -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> - const struct intel_engine_cs *master,
> - const struct intel_engine_cs *sibling);
> -
> #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index 08896ae027d5..88aac9977e09 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -3727,7 +3727,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
> GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
>
> for (n = 0; n < nctx; n++) {
> - ve[n] = intel_execlists_create_virtual(siblings, nsibling);
> + ve[n] = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ve[n])) {
> err = PTR_ERR(ve[n]);
> nctx = n;
> @@ -3923,7 +3923,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
> * restrict it to our desired engine within the virtual engine.
> */
>
> - ve = intel_execlists_create_virtual(siblings, nsibling);
> + ve = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ve)) {
> err = PTR_ERR(ve);
> goto out_close;
> @@ -4054,7 +4054,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
> i915_request_add(rq);
> }
>
> - ce = intel_execlists_create_virtual(siblings, nsibling);
> + ce = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ce)) {
> err = PTR_ERR(ce);
> goto out;
> @@ -4106,7 +4106,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
>
> /* XXX We do not handle oversubscription and fairness with normal rq */
> for (n = 0; n < nsibling; n++) {
> - ce = intel_execlists_create_virtual(siblings, nsibling);
> + ce = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ce)) {
> err = PTR_ERR(ce);
> goto out;
> @@ -4208,7 +4208,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
> if (err)
> goto out_scratch;
>
> - ve = intel_execlists_create_virtual(siblings, nsibling);
> + ve = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ve)) {
> err = PTR_ERR(ve);
> goto out_scratch;
> @@ -4431,16 +4431,16 @@ static int bond_virtual_engine(struct intel_gt *gt,
> for (n = 0; n < nsibling; n++) {
> struct intel_context *ve;
>
> - ve = intel_execlists_create_virtual(siblings, nsibling);
> + ve = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ve)) {
> err = PTR_ERR(ve);
> onstack_fence_fini(&fence);
> goto out;
> }
>
> - err = intel_virtual_engine_attach_bond(ve->engine,
> - master,
> - siblings[n]);
> + err = intel_engine_attach_bond(ve->engine,
> + master,
> + siblings[n]);
> if (err) {
> intel_context_put(ve);
> onstack_fence_fini(&fence);
> @@ -4576,7 +4576,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
> if (igt_spinner_init(&spin, gt))
> return -ENOMEM;
>
> - ve = intel_execlists_create_virtual(siblings, nsibling);
> + ve = intel_engine_create_virtual(siblings, nsibling);
> if (IS_ERR(ve)) {
> err = PTR_ERR(ve);
> goto out_spin;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index d605af0d66e6..ccbcf024b31b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -60,6 +60,15 @@
> *
> */
>
> +/* GuC Virtual Engine */
> +struct guc_virtual_engine {
> + struct intel_engine_cs base;
> + struct intel_context context;
> +};
> +
> +static struct intel_context *
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
> +
> #define GUC_REQUEST_SIZE 64 /* bytes */
>
> /*
> @@ -928,20 +937,35 @@ static int guc_lrc_desc_pin(struct intel_context *ce)
> return ret;
> }
>
> -static int guc_context_pre_pin(struct intel_context *ce,
> - struct i915_gem_ww_ctx *ww,
> - void **vaddr)
> +static int __guc_context_pre_pin(struct intel_context *ce,
> + struct intel_engine_cs *engine,
> + struct i915_gem_ww_ctx *ww,
> + void **vaddr)
> {
> - return lrc_pre_pin(ce, ce->engine, ww, vaddr);
> + return lrc_pre_pin(ce, engine, ww, vaddr);
> }
>
> -static int guc_context_pin(struct intel_context *ce, void *vaddr)
> +static int __guc_context_pin(struct intel_context *ce,
> + struct intel_engine_cs *engine,
> + void *vaddr)
> {
> if (i915_ggtt_offset(ce->state) !=
> (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
> set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>
> - return lrc_pin(ce, ce->engine, vaddr);
> + return lrc_pin(ce, engine, vaddr);
> +}
> +
> +static int guc_context_pre_pin(struct intel_context *ce,
> + struct i915_gem_ww_ctx *ww,
> + void **vaddr)
> +{
> + return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
> +}
> +
> +static int guc_context_pin(struct intel_context *ce, void *vaddr)
> +{
> + return __guc_context_pin(ce, ce->engine, vaddr);
> }
>
> static void guc_context_unpin(struct intel_context *ce)
> @@ -1041,6 +1065,21 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> deregister_context(ce, ce->guc_id);
> }
>
> +static void __guc_context_destroy(struct intel_context *ce)
> +{
> + lrc_fini(ce);
> + intel_context_fini(ce);
> +
> + if (intel_engine_is_virtual(ce->engine)) {
> + struct guc_virtual_engine *ve =
> + container_of(ce, typeof(*ve), context);
> +
> + kfree(ve);
> + } else {
> + intel_context_free(ce);
> + }
> +}
> +
> static void guc_context_destroy(struct kref *kref)
> {
> struct intel_context *ce = container_of(kref, typeof(*ce), ref);
> @@ -1057,7 +1096,7 @@ static void guc_context_destroy(struct kref *kref)
> if (context_guc_id_invalid(ce) ||
> !lrc_desc_registered(guc, ce->guc_id)) {
> release_guc_id(guc, ce);
> - lrc_destroy(kref);
AFAICS after this patch we only have 1 use of lrc_destroy inside the
execlists file, while we do have 2 open coded implementations (here and
execlists VE). Since lrc_fini and intel_context_fini are still always
called as a pair, maybe we can replace lrc_destroy with a function that
calls those 2 (i.e basically just remove the free() from lrc_destroy)?
Can be done as a follow up.
> + __guc_context_destroy(ce);
> return;
> }
>
> @@ -1073,7 +1112,7 @@ static void guc_context_destroy(struct kref *kref)
> if (context_guc_id_invalid(ce)) {
> __release_guc_id(guc, ce);
> spin_unlock_irqrestore(&guc->contexts_lock, flags);
> - lrc_destroy(kref);
> + __guc_context_destroy(ce);
> return;
> }
>
> @@ -1118,6 +1157,8 @@ static const struct intel_context_ops guc_context_ops = {
>
> .reset = lrc_reset,
> .destroy = guc_context_destroy,
> +
> + .create_virtual = guc_create_virtual,
> };
>
> static void __guc_signal_context_fence(struct intel_context *ce)
> @@ -1246,6 +1287,96 @@ static int guc_request_alloc(struct i915_request *rq)
> return 0;
> }
>
> +static struct intel_engine_cs *
> +guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
> +{
> + struct intel_engine_cs *engine;
> + intel_engine_mask_t tmp, mask = ve->mask;
> + unsigned int num_siblings = 0;
> +
> + for_each_engine_masked(engine, ve->gt, mask, tmp)
> + if (num_siblings++ == sibling)
> + return engine;
> +
> + return NULL;
> +}
> +
> +static int guc_virtual_context_pre_pin(struct intel_context *ce,
> + struct i915_gem_ww_ctx *ww,
> + void **vaddr)
> +{
> + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
> +
> + return __guc_context_pre_pin(ce, engine, ww, vaddr);
> +}
> +
> +static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
> +{
> + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
> +
> + return __guc_context_pin(ce, engine, vaddr);
> +}
> +
> +static void guc_virtual_context_enter(struct intel_context *ce)
> +{
> + intel_engine_mask_t tmp, mask = ce->engine->mask;
> + struct intel_engine_cs *engine;
> +
> + for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
> + intel_engine_pm_get(engine);
> +
> + intel_timeline_enter(ce->timeline);
> +}
> +
> +static void guc_virtual_context_exit(struct intel_context *ce)
> +{
> + intel_engine_mask_t tmp, mask = ce->engine->mask;
> + struct intel_engine_cs *engine;
> +
> + for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
> + intel_engine_pm_put(engine);
> +
> + intel_timeline_exit(ce->timeline);
> +}
> +
> +static int guc_virtual_context_alloc(struct intel_context *ce)
> +{
> + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
> +
> + return lrc_alloc(ce, engine);
> +}
> +
> +static struct intel_context *guc_clone_virtual(struct intel_engine_cs *src)
> +{
> + struct intel_engine_cs *siblings[GUC_MAX_INSTANCES_PER_CLASS], *engine;
> + intel_engine_mask_t tmp, mask = src->mask;
> + unsigned int num_siblings = 0;
> +
> + for_each_engine_masked(engine, src->gt, mask, tmp)
> + siblings[num_siblings++] = engine;
> +
> + return guc_create_virtual(siblings, num_siblings);
> +}
> +
> +static const struct intel_context_ops virtual_guc_context_ops = {
> + .alloc = guc_virtual_context_alloc,
> +
> + .pre_pin = guc_virtual_context_pre_pin,
> + .pin = guc_virtual_context_pin,
> + .unpin = guc_context_unpin,
> + .post_unpin = guc_context_post_unpin,
> +
> + .enter = guc_virtual_context_enter,
> + .exit = guc_virtual_context_exit,
> +
> + .sched_disable = guc_context_sched_disable,
> +
> + .destroy = guc_context_destroy,
> +
> + .clone_virtual = guc_clone_virtual,
> + .get_sibling = guc_virtual_get_sibling,
> +};
> +
> static void sanitize_hwsp(struct intel_engine_cs *engine)
> {
> struct intel_timeline *tl;
> @@ -1557,7 +1688,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> } else if (context_destroyed(ce)) {
> /* Context has been destroyed */
> release_guc_id(guc, ce);
> - lrc_destroy(&ce->ref);
> + __guc_context_destroy(ce);
> }
>
> decr_outstanding_submission_g2h(guc);
> @@ -1669,3 +1800,107 @@ void intel_guc_log_context_info(struct intel_guc *guc,
> atomic_read(&ce->guc_sched_state_no_lock));
> }
> }
> +
> +static struct intel_context *
> +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> +{
> + struct guc_virtual_engine *ve;
> + struct intel_guc *guc;
> + unsigned int n;
> + int err;
> +
> + ve = kzalloc(sizeof(*ve), GFP_KERNEL);
> + if (!ve)
> + return ERR_PTR(-ENOMEM);
> +
> + guc = &siblings[0]->gt->uc.guc;
> +
> + ve->base.i915 = siblings[0]->i915;
> + ve->base.gt = siblings[0]->gt;
> + ve->base.uncore = siblings[0]->uncore;
> + ve->base.id = -1;
> +
> + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> + ve->base.saturated = ALL_ENGINES;
Most of these settings are the same for both execlists and GuC and
aren't back-end dependent. Maybe we can have a:
intel_virtual_engine_init_early(struct intel_engine_cs *engine,
struct intel_engine_cs *sibling);
And call that from both places? Can be done as a follow-up
> + ve->base.breadcrumbs = intel_breadcrumbs_create(&ve->base);
> + if (!ve->base.breadcrumbs) {
> + kfree(ve);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
> +
> + ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
> +
> + ve->base.cops = &virtual_guc_context_ops;
> + ve->base.request_alloc = guc_request_alloc;
> +
> + ve->base.submit_request = guc_submit_request;
> +
> + ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +
> + intel_context_init(&ve->context, &ve->base);
> +
> + for (n = 0; n < count; n++) {
> + struct intel_engine_cs *sibling = siblings[n];
> +
> + GEM_BUG_ON(!is_power_of_2(sibling->mask));
> + if (sibling->mask & ve->base.mask) {
> + DRM_DEBUG("duplicate %s entry in load balancer\n",
> + sibling->name);
> + err = -EINVAL;
> + goto err_put;
> + }
> +
> + ve->base.mask |= sibling->mask;
> +
> + if (n != 0 && ve->base.class != sibling->class) {
> + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> + sibling->class, ve->base.class);
> + err = -EINVAL;
> + goto err_put;
> + } else if (n == 0) {
> + ve->base.class = sibling->class;
> + ve->base.uabi_class = sibling->uabi_class;
> + snprintf(ve->base.name, sizeof(ve->base.name),
> + "v%dx%d", ve->base.class, count);
> + ve->base.context_size = sibling->context_size;
> +
> + ve->base.emit_bb_start = sibling->emit_bb_start;
> + ve->base.emit_flush = sibling->emit_flush;
> + ve->base.emit_init_breadcrumb =
> + sibling->emit_init_breadcrumb;
> + ve->base.emit_fini_breadcrumb =
> + sibling->emit_fini_breadcrumb;
> + ve->base.emit_fini_breadcrumb_dw =
> + sibling->emit_fini_breadcrumb_dw;
> +
> + ve->base.flags |= sibling->flags;
> +
Same here, most of these setting from the sibling are the same.
intel_virtual_engine_inherit_from_sibling()?
Apart from the various nits the code LGTM, but I'll wait until the next
spin for an r-b since a good chunk of the patch is going away.
Daniele
> + ve->base.props.timeslice_duration_ms =
> + sibling->props.timeslice_duration_ms;
> + }
> + }
> +
> + return &ve->context;
> +
> +err_put:
> + intel_context_put(&ve->context);
> + return ERR_PTR(err);
> +}
> +
> +
> +
> +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
> +{
> + struct intel_engine_cs *engine;
> + intel_engine_mask_t tmp, mask = ve->mask;
> +
> + for_each_engine_masked(engine, ve->gt, mask, tmp)
> + if (READ_ONCE(engine->props.heartbeat_interval_ms))
> + return true;
> +
> + return false;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> index 6453e2bfa151..95df5ab06031 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
> @@ -25,6 +25,8 @@ void intel_guc_log_submission_info(struct intel_guc *guc,
> struct drm_printer *p);
> void intel_guc_log_context_info(struct intel_guc *guc, struct drm_printer *p);
>
> +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
> +
> static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
> {
> /* XXX: GuC submission is unavailable for now */
More information about the dri-devel
mailing list