[Intel-gfx] [PATCH 20/27] drm/i915: Remove logical HW ID

Wed Sep 25 18:23:47 UTC 2019

On Wed, Sep 25, 2019 at 10:59:32AM -0700, Daniele Ceraolo Spurio wrote:
>+ Matt
>

I've reviewed this patch and this affects some of the work I'm doing on the new
GuC interface. For the new GuC interface I have two patches that rework the HW
ID assignment.

The first patch moves ownership of the HW ID from i915_gem_context to
intel_context as the new GuC has 1 to 1 mapping betweens HW IDs and LRCs.

The second patch changes the allocation algorithm to use a bitmap rather than
ida as contiguous HW IDs are required for multi-lrc to work with the new GuC.

All of this being said, I don't think anything in this patch will badly break
what I have for the new GuC. At most it will be minor refactor once the new GuC
patches are rebased on top of these patches.

-Matt

>On 9/25/19 5:41 AM, Tvrtko Ursulin wrote:
>>[+ Daniele, I think he might want to have a look at this.]
>>
>>On 25/09/2019 11:01, Chris Wilson wrote:
>>>With the introduction of ctx->engines[] we allow multiple logical
>>>contexts to be used on the same engine (e.g. with virtual engines). Each
>>>logical context requires a unique tag in order for context-switching to
>>>occur correctly between them.
>>>
>>>We only need to keep a unique tag for the active lifetime of the
>>>context, and for as long as we need to identify that context. The HW
>>>uses the tag to determine if it should use a lite-restore (why not the
>>>LRCA?) and passes the tag back for various status identifies. The only
>>>status we need to track is for OA, so when using perf, we assign the
>>>specific context a unique tag.
>>>
>>>Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set 
>>>of engines")
>>
>>Fixes? Why?
>>
>>>Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>>---
>>>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 147 ------------------
>>>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
>>>  .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
>>>  .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
>>>  .../gpu/drm/i915/gem/selftests/mock_context.c |   8 -
>>>  drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
>>>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   4 +-
>>>  drivers/gpu/drm/i915/gt/intel_lrc.c           |  29 ++--
>>>  drivers/gpu/drm/i915/gvt/kvmgt.c              |  17 --
>>>  drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
>>>  drivers/gpu/drm/i915/i915_gpu_error.c         |   7 +-
>>>  drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
>>>  drivers/gpu/drm/i915/i915_perf.c              |  25 ++-
>>>  drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
>>>  .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
>>>  drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
>>>  16 files changed, 48 insertions(+), 284 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
>>>b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>index e832238a72e5..4e59b809d901 100644
>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>@@ -168,97 +168,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
>>>      return i915_gem_context_get_engine(ctx, idx);
>>>  }
>>>-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
>>>-{
>>>-    unsigned int max;
>>>-
>>>-    lockdep_assert_held(&i915->contexts.mutex);
>>>-
>>>-    if (INTEL_GEN(i915) >= 12)
>>>-        max = GEN12_MAX_CONTEXT_HW_ID;
>>>-    else if (INTEL_GEN(i915) >= 11)
>>>-        max = GEN11_MAX_CONTEXT_HW_ID;
>>>-    else if (USES_GUC_SUBMISSION(i915))
>>>-        /*
>>>-         * When using GuC in proxy submission, GuC consumes the
>>>-         * highest bit in the context id to indicate proxy submission.
>>>-         */
>>>-        max = MAX_GUC_CONTEXT_HW_ID;
>>>-    else
>>>-        max = MAX_CONTEXT_HW_ID;
>>>-
>>>-    return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
>>>-}
>>>-
>>>-static int steal_hw_id(struct drm_i915_private *i915)
>>>-{
>>>-    struct i915_gem_context *ctx, *cn;
>>>-    LIST_HEAD(pinned);
>>>-    int id = -ENOSPC;
>>>-
>>>-    lockdep_assert_held(&i915->contexts.mutex);
>>>-
>>>-    list_for_each_entry_safe(ctx, cn,
>>>-                 &i915->contexts.hw_id_list, hw_id_link) {
>>>-        if (atomic_read(&ctx->hw_id_pin_count)) {
>>>-            list_move_tail(&ctx->hw_id_link, &pinned);
>>>-            continue;
>>>-        }
>>>-
>>>-        GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
>>>-        list_del_init(&ctx->hw_id_link);
>>>-        id = ctx->hw_id;
>>>-        break;
>>>-    }
>>>-
>>>-    /*
>>>-     * Remember how far we got up on the last repossesion scan, so the
>>>-     * list is kept in a "least recently scanned" order.
>>>-     */
>>>-    list_splice_tail(&pinned, &i915->contexts.hw_id_list);
>>>-    return id;
>>>-}
>>>-
>>>-static int assign_hw_id(struct drm_i915_private *i915, unsigned 
>>>int *out)
>>>-{
>>>-    int ret;
>>>-
>>>-    lockdep_assert_held(&i915->contexts.mutex);
>>>-
>>>-    /*
>>>-     * We prefer to steal/stall ourselves and our users over that of the
>>>-     * entire system. That may be a little unfair to our users, and
>>>-     * even hurt high priority clients. The choice is whether to oomkill
>>>-     * something else, or steal a context id.
>>>-     */
>>>-    ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | 
>>>__GFP_NOWARN);
>>>-    if (unlikely(ret < 0)) {
>>>-        ret = steal_hw_id(i915);
>>>-        if (ret < 0) /* once again for the correct errno code */
>>>-            ret = new_hw_id(i915, GFP_KERNEL);
>>>-        if (ret < 0)
>>>-            return ret;
>>>-    }
>>>-
>>>-    *out = ret;
>>>-    return 0;
>>>-}
>>>-
>>>-static void release_hw_id(struct i915_gem_context *ctx)
>>>-{
>>>-    struct drm_i915_private *i915 = ctx->i915;
>>>-
>>>-    if (list_empty(&ctx->hw_id_link))
>>>-        return;
>>>-
>>>-    mutex_lock(&i915->contexts.mutex);
>>>-    if (!list_empty(&ctx->hw_id_link)) {
>>>-        ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
>>>-        list_del_init(&ctx->hw_id_link);
>>>-    }
>>>-    mutex_unlock(&i915->contexts.mutex);
>>>-}
>>>-
>>>  static void __free_engines(struct i915_gem_engines *e, unsigned 
>>>int count)
>>>  {
>>>      while (count--) {
>>>@@ -313,8 +222,6 @@ static void i915_gem_context_free(struct 
>>>i915_gem_context *ctx)
>>>      lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>>>      GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>>>-    release_hw_id(ctx);
>>>-
>>>      free_engines(rcu_access_pointer(ctx->engines));
>>>      mutex_destroy(&ctx->engines_mutex);
>>>@@ -459,12 +366,6 @@ static void context_close(struct 
>>>i915_gem_context *ctx)
>>>      ctx->file_priv = ERR_PTR(-EBADF);
>>>-    /*
>>>-     * This context will never again be assinged to HW, so we can
>>>-     * reuse its ID for the next context.
>>>-     */
>>>-    release_hw_id(ctx);
>>>-
>>>      /*
>>>       * The LUT uses the VMA as a backpointer to unref the object,
>>>       * so we need to clear the LUT before we close all the VMA (inside
>>>@@ -507,7 +408,6 @@ __create_context(struct drm_i915_private *i915)
>>>      RCU_INIT_POINTER(ctx->engines, e);
>>>      INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>>>-    INIT_LIST_HEAD(&ctx->hw_id_link);
>>>      /* NB: Mark all slices as needing a remap so that when the 
>>>context first
>>>       * loads it will restore whatever remap state already 
>>>exists. If there
>>>@@ -676,18 +576,11 @@ struct i915_gem_context *
>>>  i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>>>  {
>>>      struct i915_gem_context *ctx;
>>>-    int err;
>>>      ctx = i915_gem_create_context(i915, 0);
>>>      if (IS_ERR(ctx))
>>>          return ctx;
>>>-    err = i915_gem_context_pin_hw_id(ctx);
>>>-    if (err) {
>>>-        destroy_kernel_context(&ctx);
>>>-        return ERR_PTR(err);
>>>-    }
>>>-
>>>      i915_gem_context_clear_bannable(ctx);
>>>      i915_gem_context_set_persistence(ctx);
>>>      ctx->sched.priority = I915_USER_PRIORITY(prio);
>>>@@ -727,15 +620,6 @@ int i915_gem_contexts_init(struct 
>>>drm_i915_private *dev_priv)
>>>          DRM_ERROR("Failed to create default global context\n");
>>>          return PTR_ERR(ctx);
>>>      }
>>>-    /*
>>>-     * For easy recognisablity, we want the kernel context to be 
>>>0 and then
>>>-     * all user contexts will have non-zero hw_id. Kernel contexts are
>>>-     * permanently pinned, so that we never suffer a stall and can
>>>-     * use them from any allocation context (e.g. for evicting other
>>>-     * contexts and from inside the shrinker).
>>>-     */
>>>-    GEM_BUG_ON(ctx->hw_id);
>>>-    GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
>>>      dev_priv->kernel_context = ctx;
>>>      DRM_DEBUG_DRIVER("%s context support initialized\n",
>>>@@ -749,10 +633,6 @@ void i915_gem_contexts_fini(struct 
>>>drm_i915_private *i915)
>>>      lockdep_assert_held(&i915->drm.struct_mutex);
>>>      destroy_kernel_context(&i915->kernel_context);
>>>-
>>>-    /* Must free all deferred contexts (via flush_workqueue) first */
>>>-    GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
>>>-    ida_destroy(&i915->contexts.hw_ida);
>>>  }
>>>  static int context_idr_cleanup(int id, void *p, void *data)
>>>@@ -2438,33 +2318,6 @@ int 
>>>i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
>>>      return ret;
>>>  }
>>>-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
>>>-{
>>>-    struct drm_i915_private *i915 = ctx->i915;
>>>-    int err = 0;
>>>-
>>>-    mutex_lock(&i915->contexts.mutex);
>>>-
>>>-    GEM_BUG_ON(i915_gem_context_is_closed(ctx));
>>>-
>>>-    if (list_empty(&ctx->hw_id_link)) {
>>>-        GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
>>>-
>>>-        err = assign_hw_id(i915, &ctx->hw_id);
>>>-        if (err)
>>>-            goto out_unlock;
>>>-
>>>-        list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
>>>-    }
>>>-
>>>-    GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
>>>-    atomic_inc(&ctx->hw_id_pin_count);
>>>-
>>>-out_unlock:
>>>-    mutex_unlock(&i915->contexts.mutex);
>>>-    return err;
>>>-}
>>>-
>>>  /* GEM context-engines iterator: for_each_gem_engine() */
>>>  struct intel_context *
>>>  i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
>>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h 
>>>b/drivers/gpu/drm/i915/gem/i915_gem_context.h
>>>index e0f5b6c6a331..1b0df53436cf 100644
>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
>>>@@ -127,21 +127,6 @@ i915_gem_context_clear_user_engines(struct 
>>>i915_gem_context *ctx)
>>>      clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
>>>  }
>>>-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
>>>-static inline int i915_gem_context_pin_hw_id(struct 
>>>i915_gem_context *ctx)
>>>-{
>>>-    if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
>>>-        return 0;
>>>-
>>>-    return __i915_gem_context_pin_hw_id(ctx);
>>>-}
>>>-
>>>-static inline void i915_gem_context_unpin_hw_id(struct 
>>>i915_gem_context *ctx)
>>>-{
>>>-    GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
>>>-    atomic_dec(&ctx->hw_id_pin_count);
>>>-}
>>>-
>>>  static inline bool i915_gem_context_is_kernel(struct 
>>>i915_gem_context *ctx)
>>>  {
>>>      return !ctx->file_priv;
>>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
>>>b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>index daf1ea5075a6..6419da7c9f90 100644
>>>--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>>@@ -148,24 +148,6 @@ struct i915_gem_context {
>>>  #define CONTEXT_FORCE_SINGLE_SUBMISSION    2
>>>  #define CONTEXT_USER_ENGINES        3
>>>-    /**
>>>-     * @hw_id: - unique identifier for the context
>>>-     *
>>>-     * The hardware needs to uniquely identify the context for a few
>>>-     * functions like fault reporting, PASID, scheduling. The
>>>-     * &drm_i915_private.context_hw_ida is used to assign a unqiue
>>>-     * id for the lifetime of the context.
>>>-     *
>>>-     * @hw_id_pin_count: - number of times this context had been pinned
>>>-     * for use (should be, at most, once per engine).
>>>-     *
>>>-     * @hw_id_link: - all contexts with an assigned id are tracked
>>>-     * for possible repossession.
>>>-     */
>>>-    unsigned int hw_id;
>>>-    atomic_t hw_id_pin_count;
>>>-    struct list_head hw_id_link;
>>>-
>>>      struct mutex mutex;
>>>      struct i915_sched_attr sched;
>>>diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
>>>b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>>index b39224ed3817..a8a9293f4ac6 100644
>>>--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>>+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>>@@ -446,9 +446,9 @@ static int igt_ctx_exec(void *arg)
>>>              err = gpu_fill(ce, obj, dw);
>>>              if (err) {
>>>-                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>>>+                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) [full-ppgtt? %s], err=%d\n",
>>>                         ndwords, dw, max_dwords(obj),
>>>-                       engine->name, ctx->hw_id,
>>>+                       engine->name,
>>>                         yesno(!!ctx->vm), err);
>>>                  intel_context_put(ce);
>>>                  kernel_context_close(ctx);
>>>@@ -584,9 +584,9 @@ static int igt_shared_ctx_exec(void *arg)
>>>              err = gpu_fill(ce, obj, dw);
>>>              if (err) {
>>>-                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>>>+                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) [full-ppgtt? %s], err=%d\n",
>>>                         ndwords, dw, max_dwords(obj),
>>>-                       engine->name, ctx->hw_id,
>>>+                       engine->name,
>>>                         yesno(!!ctx->vm), err);
>>>                  intel_context_put(ce);
>>>                  kernel_context_close(ctx);
>>>@@ -1168,10 +1168,9 @@ static int igt_ctx_readonly(void *arg)
>>>              err = gpu_fill(ce, obj, dw);
>>>              if (err) {
>>>-                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
>>>+                pr_err("Failed to fill dword %lu [%lu/%lu] with 
>>>gpu (%s) [full-ppgtt? %s], err=%d\n",
>>>                         ndwords, dw, max_dwords(obj),
>>>-                       ce->engine->name, ctx->hw_id,
>>>-                       yesno(!!ctx->vm), err);
>>>+                       ce->engine->name, yesno(!!ctx->vm), err);
>>>                  i915_gem_context_unlock_engines(ctx);
>>>                  goto out_unlock;
>>>              }
>>>diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c 
>>>b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>>>index 0d1a275ec316..ebc46f098561 100644
>>>--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>>>+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>>>@@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
>>>  {
>>>      struct i915_gem_context *ctx;
>>>      struct i915_gem_engines *e;
>>>-    int ret;
>>>      ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>>>      if (!ctx)
>>>@@ -32,13 +31,8 @@ mock_context(struct drm_i915_private *i915,
>>>      RCU_INIT_POINTER(ctx->engines, e);
>>>      INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>>>-    INIT_LIST_HEAD(&ctx->hw_id_link);
>>>      mutex_init(&ctx->mutex);
>>>-    ret = i915_gem_context_pin_hw_id(ctx);
>>>-    if (ret < 0)
>>>-        goto err_engines;
>>>-
>>>      if (name) {
>>>          struct i915_ppgtt *ppgtt;
>>>@@ -56,8 +50,6 @@ mock_context(struct drm_i915_private *i915,
>>>      return ctx;
>>>-err_engines:
>>>-    free_engines(rcu_access_pointer(ctx->engines));
>>>  err_free:
>>>      kfree(ctx);
>>>      return NULL;
>>>diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
>>>b/drivers/gpu/drm/i915/gt/intel_context_types.h
>>>index bf9cedfccbf0..6959b05ae5f8 100644
>>>--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>>>+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>>>@@ -58,6 +58,7 @@ struct intel_context {
>>>      u32 *lrc_reg_state;
>>>      u64 lrc_desc;
>>>+    u32 tag; /* cookie passed to HW to track this context on 
>>>submission */
>>>      unsigned int active_count; /* protected by timeline->mutex */
>>>diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
>>>b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>index da4b45aa84b1..7bce176e696c 100644
>>>--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>@@ -296,10 +296,12 @@ struct intel_engine_cs {
>>>      u8 uabi_class;
>>>      u8 uabi_instance;
>>>+    u32 uabi_capabilities;
>>>      u32 context_size;
>>>      u32 mmio_base;
>>>-    u32 uabi_capabilities;
>>>+    unsigned int context_tag;
>>>+#define NUM_CONTEXT_TAG (256)
>>
>>AFAICT this define now defines how deep ELSP we can have before we 
>>start getting hard to debug problems. GuC angle needs considering 
>>here.
>>
>>Can we add some asserts somewhere to express the relationship?
>>
>>And a comment here explaining this, or with the assert.
>>
>>>      struct rb_node uabi_node;
>>>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
>>>b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>index 0ea2f4d7dc43..af79b8d70800 100644
>>>--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>@@ -433,12 +433,8 @@ assert_priority_queue(const struct 
>>>i915_request *prev,
>>>  static u64
>>>  lrc_descriptor(struct intel_context *ce, struct intel_engine_cs 
>>>*engine)
>>>  {
>>>-    struct i915_gem_context *ctx = ce->gem_context;
>>>      u64 desc;
>>>-    BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
>>>-    BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > 
>>>(BIT(GEN11_SW_CTX_ID_WIDTH)));
>>>-
>>>      desc = INTEL_LEGACY_32B_CONTEXT;
>>>      if (i915_vm_is_4lvl(ce->vm))
>>>          desc = INTEL_LEGACY_64B_CONTEXT;
>>>@@ -456,20 +452,11 @@ lrc_descriptor(struct intel_context *ce, 
>>>struct intel_engine_cs *engine)
>>>       * anything below.
>>>       */
>>>      if (INTEL_GEN(engine->i915) >= 11) {
>>>-        GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
>>>-        desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
>>>-                                /* bits 37-47 */
>>>-
>>>          desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
>>>                                  /* bits 48-53 */
>>>-        /* TODO: decide what to do with SW counter (bits 55-60) */
>>>-
>>>          desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
>>>                                  /* bits 61-63 */
>>>-    } else {
>>>-        GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
>>>-        desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;    /* bits 
>>>32-52 */
>>>      }
>>>      return desc;
>>>@@ -978,6 +965,15 @@ __execlists_schedule_in(struct i915_request *rq)
>>>      intel_context_get(ce);
>>>+    if (ce->tag) {
>>>+        ce->lrc_desc |= (u64)ce->tag << 32;
>>>+    } else {
>>>+        ce->lrc_desc &= ~GENMASK_ULL(47, 37);
>>>+        ce->lrc_desc |=
>>>+            (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
>>>+            GEN11_SW_CTX_ID_SHIFT;
>>>+    }
>>
>>So hw id is valid only while context is in ELSP and it changes with 
>>every submission except in the OA case?
>>
>>Shifts and masks look dodgy. For >= gen11 the current code has the 
>>hw_id in [37, 42], otherwise [32, 52]. This patch does not seem to 
>>handle the differences between gens.
>>
>>>+
>>>      intel_gt_pm_get(engine->gt);
>>>      execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
>>>      intel_engine_context_in(engine);
>>>@@ -2224,7 +2220,6 @@ static void execlists_context_unpin(struct 
>>>intel_context *ce)
>>>      check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
>>>                ce->engine);
>>>-    i915_gem_context_unpin_hw_id(ce->gem_context);
>>>      i915_gem_object_unpin_map(ce->state->obj);
>>>      intel_ring_reset(ce->ring, ce->ring->tail);
>>>  }
>>>@@ -2274,18 +2269,12 @@ __execlists_context_pin(struct intel_context *ce,
>>>          goto unpin_active;
>>>      }
>>>-    ret = i915_gem_context_pin_hw_id(ce->gem_context);
>>>-    if (ret)
>>>-        goto unpin_map;
>>>-
>>>      ce->lrc_desc = lrc_descriptor(ce, engine);
>>>      ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>>>      __execlists_update_reg_state(ce, engine);
>>>      return 0;
>>>-unpin_map:
>>>-    i915_gem_object_unpin_map(ce->state->obj);
>>>  unpin_active:
>>>      intel_context_active_release(ce);
>>>  err:
>>>diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c 
>>>b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>index 343d79c1cb7e..04a5a0d90823 100644
>>>--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>@@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct 
>>>device_attribute *attr,
>>>      return sprintf(buf, "\n");
>>>  }
>>>-static ssize_t
>>>-hw_id_show(struct device *dev, struct device_attribute *attr,
>>>-       char *buf)
>>>-{
>>>-    struct mdev_device *mdev = mdev_from_dev(dev);
>>>-
>>>-    if (mdev) {
>>>-        struct intel_vgpu *vgpu = (struct intel_vgpu *)
>>>-            mdev_get_drvdata(mdev);
>>>-        return sprintf(buf, "%u\n",
>>>-                   vgpu->submission.shadow[0]->gem_context->hw_id);
>>>-    }
>>>-    return sprintf(buf, "\n");
>>>-}
>>>-
>>>  static DEVICE_ATTR_RO(vgpu_id);
>>>-static DEVICE_ATTR_RO(hw_id);
>>>  static struct attribute *intel_vgpu_attrs[] = {
>>>      &dev_attr_vgpu_id.attr,
>>>-    &dev_attr_hw_id.attr,
>>>      NULL
>>>  };
>>>diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>>b/drivers/gpu/drm/i915/i915_debugfs.c
>>>index fd41505d52ec..8caaa446490f 100644
>>>--- a/drivers/gpu/drm/i915/i915_debugfs.c
>>>+++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>>@@ -1504,9 +1504,6 @@ static int i915_context_status(struct 
>>>seq_file *m, void *unused)
>>>          struct intel_context *ce;
>>>          seq_puts(m, "HW context ");
>>>-        if (!list_empty(&ctx->hw_id_link))
>>>-            seq_printf(m, "%x [pin %u]", ctx->hw_id,
>>>-                   atomic_read(&ctx->hw_id_pin_count));
>>>          if (ctx->pid) {
>>>              struct task_struct *task;
>>>diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
>>>b/drivers/gpu/drm/i915/i915_gpu_error.c
>>>index a86d28bda6dd..47239df653f2 100644
>>>--- a/drivers/gpu/drm/i915/i915_gpu_error.c
>>>+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>>>@@ -471,9 +471,9 @@ static void error_print_context(struct 
>>>drm_i915_error_state_buf *m,
>>>                  const char *header,
>>>                  const struct drm_i915_error_context *ctx)
>>>  {
>>>-    err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
>>>-           header, ctx->comm, ctx->pid, ctx->hw_id,
>>>-           ctx->sched_attr.priority, ctx->guilty, ctx->active);
>>>+    err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
>>>+           header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
>>>+           ctx->guilty, ctx->active);
>>>  }
>>>  static void error_print_engine(struct drm_i915_error_state_buf *m,
>>>@@ -1262,7 +1262,6 @@ static bool record_context(struct 
>>>drm_i915_error_context *e,
>>>          rcu_read_unlock();
>>>      }
>>>-    e->hw_id = ctx->hw_id;
>>>      e->sched_attr = ctx->sched;
>>>      e->guilty = atomic_read(&ctx->guilty_count);
>>>      e->active = atomic_read(&ctx->active_count);
>>>diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
>>>b/drivers/gpu/drm/i915/i915_gpu_error.h
>>>index caaed5093d95..4dc36d6ee3a2 100644
>>>--- a/drivers/gpu/drm/i915/i915_gpu_error.h
>>>+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
>>>@@ -117,7 +117,6 @@ struct i915_gpu_state {
>>>          struct drm_i915_error_context {
>>>              char comm[TASK_COMM_LEN];
>>>              pid_t pid;
>>>-            u32 hw_id;
>>>              int active;
>>>              int guilty;
>>>              struct i915_sched_attr sched_attr;
>>>diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>b/drivers/gpu/drm/i915/i915_perf.c
>>>index 80055501eccb..d36ba248d438 100644
>>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>>@@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct 
>>>i915_perf_stream *stream)
>>>          } else {
>>>              stream->specific_ctx_id_mask =
>>>                  (1U << GEN8_CTX_ID_WIDTH) - 1;
>>>-            stream->specific_ctx_id =
>>>-                upper_32_bits(ce->lrc_desc);
>>>-            stream->specific_ctx_id &=
>>>-                stream->specific_ctx_id_mask;
>>>+            stream->specific_ctx_id = stream->specific_ctx_id_mask;
>>>          }
>>>          break;
>>>      case 11:
>>>      case 12: {
>>>          stream->specific_ctx_id_mask =
>>>-            ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>>(GEN11_SW_CTX_ID_SHIFT - 32) |
>>>-            ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << 
>>>(GEN11_ENGINE_INSTANCE_SHIFT - 32) |
>>>-            ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << 
>>>(GEN11_ENGINE_CLASS_SHIFT - 32);
>>>-        stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
>>>-        stream->specific_ctx_id &=
>>>-            stream->specific_ctx_id_mask;
>>>+            ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>>(GEN11_SW_CTX_ID_SHIFT - 32);
>>>+        stream->specific_ctx_id = stream->specific_ctx_id_mask;
>>>          break;
>>>      }
>>>@@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct 
>>>i915_perf_stream *stream)
>>>          MISSING_CASE(INTEL_GEN(i915));
>>>      }
>>>+    ce->tag = stream->specific_ctx_id_mask;
>>>+
>>>      DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
>>>               stream->specific_ctx_id,
>>>               stream->specific_ctx_id_mask);
>>>@@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct 
>>>i915_perf_stream *stream)
>>>  {
>>>      struct intel_context *ce;
>>>-    stream->specific_ctx_id = INVALID_CTX_ID;
>>>-    stream->specific_ctx_id_mask = 0;
>>>-
>>>      ce = fetch_and_zero(&stream->pinned_ctx);
>>>-    if (ce)
>>>+    if (ce) {
>>>+        ce->tag = 0;
>>>          intel_context_unpin(ce);
>>>+    }
>>>+
>>>+    stream->specific_ctx_id = INVALID_CTX_ID;
>>>+    stream->specific_ctx_id_mask = 0;
>>
>>OA hunks looks dodgy as well. ce->tag is set to 
>>stream->specific_ctx_id_mask while I think it should be id. 
>>Furthermore id is assigned from mask which is fixed and not unique 
>>for different contexts.
>>
>>>  }
>>>  static void
>>>diff --git a/drivers/gpu/drm/i915/i915_trace.h 
>>>b/drivers/gpu/drm/i915/i915_trace.h
>>>index 24f2944da09d..1f2cf6cfafb5 100644
>>>--- a/drivers/gpu/drm/i915/i915_trace.h
>>>+++ b/drivers/gpu/drm/i915/i915_trace.h
>>>@@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
>>>          TP_STRUCT__entry(
>>>                   __field(u32, dev)
>>>-                 __field(u32, hw_id)
>>>                   __field(u64, ctx)
>>>                   __field(u16, class)
>>>                   __field(u16, instance)
>>>@@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
>>>          TP_fast_assign(
>>>                 __entry->dev = rq->i915->drm.primary->index;
>>>-               __entry->hw_id = rq->gem_context->hw_id;
>>>                 __entry->class = rq->engine->uabi_class;
>>>                 __entry->instance = rq->engine->uabi_instance;
>>>                 __entry->ctx = rq->fence.context;
>>>@@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
>>>                 __entry->flags = flags;
>>>                 ),
>>>-        TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, 
>>>seqno=%u, flags=0x%x",
>>>+        TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, 
>>>flags=0x%x",
>>>                __entry->dev, __entry->class, __entry->instance,
>>>-              __entry->hw_id, __entry->ctx, __entry->seqno,
>>>-              __entry->flags)
>>>+              __entry->ctx, __entry->seqno, __entry->flags)
>>>  );
>>>  DECLARE_EVENT_CLASS(i915_request,
>>>@@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
>>>          TP_STRUCT__entry(
>>>                   __field(u32, dev)
>>>-                 __field(u32, hw_id)
>>>                   __field(u64, ctx)
>>>                   __field(u16, class)
>>>                   __field(u16, instance)
>>>@@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
>>>          TP_fast_assign(
>>>                 __entry->dev = rq->i915->drm.primary->index;
>>>-               __entry->hw_id = rq->gem_context->hw_id;
>>>                 __entry->class = rq->engine->uabi_class;
>>>                 __entry->instance = rq->engine->uabi_instance;
>>>                 __entry->ctx = rq->fence.context;
>>>                 __entry->seqno = rq->fence.seqno;
>>>                 ),
>>>-        TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
>>>+        TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
>>>                __entry->dev, __entry->class, __entry->instance,
>>>-              __entry->hw_id, __entry->ctx, __entry->seqno)
>>>+              __entry->ctx, __entry->seqno)
>>>  );
>>>  DEFINE_EVENT(i915_request, i915_request_add,
>>>@@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
>>>          TP_STRUCT__entry(
>>>                   __field(u32, dev)
>>>-                 __field(u32, hw_id)
>>>                   __field(u64, ctx)
>>>                   __field(u16, class)
>>>                   __field(u16, instance)
>>>@@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
>>>          TP_fast_assign(
>>>                 __entry->dev = rq->i915->drm.primary->index;
>>>-               __entry->hw_id = rq->gem_context->hw_id;
>>>                 __entry->class = rq->engine->uabi_class;
>>>                 __entry->instance = rq->engine->uabi_instance;
>>>                 __entry->ctx = rq->fence.context;
>>>@@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
>>>                 __entry->port = port;
>>>                 ),
>>>-        TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, 
>>>seqno=%u, prio=%u, port=%u",
>>>+        TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, 
>>>prio=%u, port=%u",
>>>                __entry->dev, __entry->class, __entry->instance,
>>>-              __entry->hw_id, __entry->ctx, __entry->seqno,
>>>+              __entry->ctx, __entry->seqno,
>>>                __entry->prio, __entry->port)
>>>  );
>>>@@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
>>>          TP_STRUCT__entry(
>>>                   __field(u32, dev)
>>>-                 __field(u32, hw_id)
>>>                   __field(u64, ctx)
>>>                   __field(u16, class)
>>>                   __field(u16, instance)
>>>@@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
>>>          TP_fast_assign(
>>>                 __entry->dev = rq->i915->drm.primary->index;
>>>-               __entry->hw_id = rq->gem_context->hw_id;
>>>                 __entry->class = rq->engine->uabi_class;
>>>                 __entry->instance = rq->engine->uabi_instance;
>>>                 __entry->ctx = rq->fence.context;
>>>@@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
>>>                 __entry->completed = i915_request_completed(rq);
>>>                 ),
>>>-            TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, 
>>>seqno=%u, completed?=%u",
>>>+            TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, 
>>>completed?=%u",
>>>                    __entry->dev, __entry->class, __entry->instance,
>>>-                  __entry->hw_id, __entry->ctx, __entry->seqno,
>>>-                  __entry->completed)
>>>+                  __entry->ctx, __entry->seqno, __entry->completed)
>>>  );
>>>  #else
>>>@@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
>>>          TP_STRUCT__entry(
>>>                   __field(u32, dev)
>>>-                 __field(u32, hw_id)
>>>                   __field(u64, ctx)
>>>                   __field(u16, class)
>>>                   __field(u16, instance)
>>>@@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
>>>           */
>>>          TP_fast_assign(
>>>                 __entry->dev = rq->i915->drm.primary->index;
>>>-               __entry->hw_id = rq->gem_context->hw_id;
>>>                 __entry->class = rq->engine->uabi_class;
>>>                 __entry->instance = rq->engine->uabi_instance;
>>>                 __entry->ctx = rq->fence.context;
>>>@@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
>>>                 __entry->flags = flags;
>>>                 ),
>>>-        TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, 
>>>seqno=%u, flags=0x%x",
>>>+        TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, 
>>>flags=0x%x",
>>>                __entry->dev, __entry->class, __entry->instance,
>>>-              __entry->hw_id, __entry->ctx, __entry->seqno,
>>>+              __entry->ctx, __entry->seqno,
>>>                __entry->flags)
>>>  );
>>>@@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
>>>      TP_STRUCT__entry(
>>>              __field(u32, dev)
>>>              __field(struct i915_gem_context *, ctx)
>>>-            __field(u32, hw_id)
>>>              __field(struct i915_address_space *, vm)
>>>      ),
>>>      TP_fast_assign(
>>>              __entry->dev = ctx->i915->drm.primary->index;
>>>              __entry->ctx = ctx;
>>>-            __entry->hw_id = ctx->hw_id;
>>>              __entry->vm = ctx->vm;
>>>      ),
>>>-    TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
>>>-          __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
>>>+    TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
>>>+          __entry->dev, __entry->ctx, __entry->vm)
>>>  )
>>>  DEFINE_EVENT(i915_context, i915_context_create,
>>>diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c 
>>>b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
>>>index 52d2df843148..f39f0282e78c 100644
>>>--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
>>>+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
>>>@@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg)
>>>              if (IS_ERR(rq)) {
>>>                  /* When full, fail_if_busy will trigger EBUSY */
>>>                  if (PTR_ERR(rq) != -EBUSY) {
>>>-                    pr_err("Unexpected error from request alloc 
>>>(ctx hw id %u, on %s): %d\n",
>>>-                           ctx->hw_id, engine->name,
>>>+                    pr_err("Unexpected error from request alloc 
>>>(on %s): %d\n",
>>>+                           engine->name,
>>>                             (int)PTR_ERR(rq));
>>>                      err = PTR_ERR(rq);
>>>                  }
>>>diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c 
>>>b/drivers/gpu/drm/i915/selftests/i915_vma.c
>>>index 1c9db08f7c28..ac1ff558eb90 100644
>>>--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
>>>+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
>>>@@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
>>>          }
>>>          nc = 0;
>>>-        for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
>>>+        for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
>>>              for (; nc < num_ctx; nc++) {
>>>                  ctx = mock_context(i915, "mock");
>>>                  if (!ctx)
>>>
>>
>>Unless I am missing something I see this patch as simplification and 
>>not a fix. If we can get away with it in the GuC world then it's 
>>quite a big simplification so it's fine by me.
>
>The GuC will require a fixed ID per intel_context for the whole time 
>the context is in use/pinned. The GUC IDs are a global list (i.e. all 
>engines share it) with 64k entries, so the logic to assign them is 
>going to be a bit different and most likely confined to the guc 
>context_pin/unpin callbacks, but we might bring back some of the 
>low-level ID management logic that has been removed in this patch.
>
>An ID at the gem_context level definitely doesn't help on the guc side, so:
>
>Acked-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
>Daniele
>
>>
>>Regards,
>>
>>Tvrtko