[Intel-gfx] [PATCH 39/39] drm/i915: Remove logical HW ID

Wed Mar 13 14:44:01 UTC 2019

We only need to keep a unique tag for the active lifetime of the
context, and for as long as we need to identify that context. The HW
uses the tag to determine if it should use a lite-restore (why not the
LRCA?) and passes the tag back for various status identifies. The only
status we need to track is for OA, so when using perf, we assign the
specific context a unique tag.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 150 ------------------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
 .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |  10 --
 drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
 drivers/gpu/drm/i915/i915_drv.h               |  11 --
 drivers/gpu/drm/i915/i915_gpu_error.c         |  10 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
 drivers/gpu/drm/i915/i915_perf.c              |  58 ++-----
 drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
 drivers/gpu/drm/i915/intel_context_types.h    |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h     |   3 +
 drivers/gpu/drm/i915/intel_lrc.c              |  29 ++--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
 16 files changed, 53 insertions(+), 313 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e763e1142ce9..7d528405fec7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -127,95 +127,6 @@ static void lut_close(struct i915_gem_context *ctx)
 	rcu_read_unlock();
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
-{
-	unsigned int max;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	if (INTEL_GEN(i915) >= 11)
-		max = GEN11_MAX_CONTEXT_HW_ID;
-	else if (USES_GUC_SUBMISSION(i915))
-		/*
-		 * When using GuC in proxy submission, GuC consumes the
-		 * highest bit in the context id to indicate proxy submission.
-		 */
-		max = MAX_GUC_CONTEXT_HW_ID;
-	else
-		max = MAX_CONTEXT_HW_ID;
-
-	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-	struct i915_gem_context *ctx, *cn;
-	LIST_HEAD(pinned);
-	int id = -ENOSPC;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	list_for_each_entry_safe(ctx, cn,
-				 &i915->contexts.hw_id_list, hw_id_link) {
-		if (atomic_read(&ctx->hw_id_pin_count)) {
-			list_move_tail(&ctx->hw_id_link, &pinned);
-			continue;
-		}
-
-		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-		list_del_init(&ctx->hw_id_link);
-		id = ctx->hw_id;
-		break;
-	}
-
-	/*
-	 * Remember how far we got up on the last repossesion scan, so the
-	 * list is kept in a "least recently scanned" order.
-	 */
-	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-	return id;
-}
-
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
-{
-	int ret;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	/*
-	 * We prefer to steal/stall ourselves and our users over that of the
-	 * entire system. That may be a little unfair to our users, and
-	 * even hurt high priority clients. The choice is whether to oomkill
-	 * something else, or steal a context id.
-	 */
-	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	if (unlikely(ret < 0)) {
-		ret = steal_hw_id(i915);
-		if (ret < 0) /* once again for the correct errno code */
-			ret = new_hw_id(i915, GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
-
-	*out = ret;
-	return 0;
-}
-
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-
-	if (list_empty(&ctx->hw_id_link))
-		return;
-
-	mutex_lock(&i915->contexts.mutex);
-	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-		list_del_init(&ctx->hw_id_link);
-	}
-	mutex_unlock(&i915->contexts.mutex);
-}
-
 static void free_engines(struct intel_engine_cs **engines, int count)
 {
 	int i;
@@ -238,7 +149,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
-	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 	free_engines(ctx->engines, ctx->nengine);
 
@@ -309,12 +219,6 @@ static void context_close(struct i915_gem_context *ctx)
 
 	i915_gem_context_set_closed(ctx);
 
-	/*
-	 * This context will never again be assinged to HW, so we can
-	 * reuse its ID for the next context.
-	 */
-	release_hw_id(ctx);
-
 	/*
 	 * The LUT uses the VMA as a backpointer to unref the object,
 	 * so we need to clear the LUT before we close all the VMA (inside
@@ -377,7 +281,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -569,18 +472,11 @@ struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;
-	int err;
 
 	ctx = i915_gem_create_context(i915, NULL, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = i915_gem_context_pin_hw_id(ctx);
-	if (err) {
-		destroy_kernel_context(&ctx);
-		return ERR_PTR(err);
-	}
-
 	i915_gem_context_clear_bannable(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
@@ -595,12 +491,6 @@ static void init_contexts(struct drm_i915_private *i915)
 	mutex_init(&i915->contexts.mutex);
 	INIT_LIST_HEAD(&i915->contexts.list);
 
-	/* Using the simple ida interface, the max is limited by sizeof(int) */
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
-	ida_init(&i915->contexts.hw_ida);
-	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
-
 	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
 	init_llist_head(&i915->contexts.free_list);
 }
@@ -627,15 +517,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	/*
-	 * For easy recognisablity, we want the kernel context to be 0 and then
-	 * all user contexts will have non-zero hw_id. Kernel contexts are
-	 * permanently pinned, so that we never suffer a stall and can
-	 * use them from any allocation context (e.g. for evicting other
-	 * contexts and from inside the shrinker).
-	 */
-	GEM_BUG_ON(ctx->hw_id);
-	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
 	dev_priv->kernel_context = ctx;
 
 	/* highest priority; preempting task */
@@ -660,10 +541,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 	if (i915->preempt_context)
 		destroy_kernel_context(&i915->preempt_context);
 	destroy_kernel_context(&i915->kernel_context);
-
-	/* Must free all deferred contexts (via flush_workqueue) first */
-	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
-	ida_destroy(&i915->contexts.hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
@@ -2210,33 +2087,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int err = 0;
-
-	mutex_lock(&i915->contexts.mutex);
-
-	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
-	if (list_empty(&ctx->hw_id_link)) {
-		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
-		err = assign_hw_id(i915, &ctx->hw_id);
-		if (err)
-			goto out_unlock;
-
-		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
-	}
-
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
-	atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
-	mutex_unlock(&i915->contexts.mutex);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_context.c"
 #include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 0e813faca451..4bb0af632f8b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -93,21 +93,6 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
-		return 0;
-
-	return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
-	atomic_dec(&ctx->hw_id_pin_count);
-}
-
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 333eeacef5e3..7cfc72e8202f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -114,24 +114,6 @@ struct i915_gem_context {
 
 	unsigned int nengine;
 
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
-
 	struct list_head active_engines;
 	struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 9448d1b434a6..4a6c306ffb87 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -535,9 +535,9 @@ static int igt_ctx_exec(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->ppgtt), err);
 				goto out_unlock;
 			}
@@ -658,9 +658,9 @@ static int igt_shared_ctx_exec(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->ppgtt), err);
 				goto out_test;
 			}
@@ -1239,10 +1239,9 @@ static int igt_ctx_readonly(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->ppgtt), err);
+				       engine->name, yesno(!!ctx->ppgtt), err);
 				goto out_unlock;
 			}
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index c9d9969821e0..2549944eb99d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -12,7 +12,6 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -26,14 +25,9 @@ mock_context(struct drm_i915_private *i915,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret < 0)
-		goto err_handles;
-
 	if (name) {
 		struct i915_hw_ppgtt *ppgtt;
 
@@ -50,10 +44,6 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_handles:
-	kfree(ctx);
-	return NULL;
-
 err_put:
 	i915_gem_context_set_closed(ctx);
 	i915_gem_context_put(ctx);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 08683dca7775..e87b3ac45a7f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1888,9 +1888,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
-		if (!list_empty(&ctx->hw_id_link))
-			seq_printf(m, "%x [pin %u]", ctx->hw_id,
-				   atomic_read(&ctx->hw_id_pin_count));
 		if (ctx->pid) {
 			struct task_struct *task;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 633d3c0b6e96..082d686df544 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1732,16 +1732,6 @@ struct drm_i915_private {
 		struct list_head list;
 		struct llist_head free_list;
 		struct work_struct free_work;
-
-		/* The hw wants to have a stable context identifier for the
-		 * lifetime of the context (for OA, PASID, faults, etc).
-		 * This is limited in execlists to 21 bits.
-		 */
-		struct ida hw_ida;
-#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
-#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
-		struct list_head hw_id_list;
 	} contexts;
 
 	u32 fdi_rx_config;
@@ -1864,7 +1854,6 @@ struct drm_i915_private {
 			 */
 			struct i915_perf_stream *exclusive_stream;
 
-			struct intel_context *pinned_ctx;
 			u32 specific_ctx_id;
 			u32 specific_ctx_id_mask;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fa9827eed8ef..4957187f7aa8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -457,8 +457,8 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, guilty %d active %d\n",
-		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
+	err_printf(m, "%s%s[%d] user_handle %d, prio %d, guilty %d active %d\n",
+		   header, ctx->comm, ctx->pid, ctx->handle,
 		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
 }
 
@@ -761,11 +761,10 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		if (obj) {
 			err_puts(m, m->i915->engine[i]->name);
 			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d], ctx %d [%d])",
+				err_printf(m, " (submitted by %s [%d/%d])",
 					   ee->context.comm,
 					   ee->context.pid,
-					   ee->context.handle,
-					   ee->context.hw_id);
+					   ee->context.handle);
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
@@ -1330,7 +1329,6 @@ static void record_context(struct drm_i915_error_context *e,
 	}
 
 	e->handle = ctx->user_handle;
-	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 99d6b7b270c2..6fe17edcb132 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -117,7 +117,6 @@ struct i915_gpu_state {
 			char comm[TASK_COMM_LEN];
 			pid_t pid;
 			u32 handle;
-			u32 hw_id;
 			int active;
 			int guilty;
 			struct i915_sched_attr sched_attr;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index fc7fa4238046..0b47b7bdd04d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1202,33 +1202,6 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 	return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
 }
 
-static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
-					    struct i915_gem_context *ctx)
-{
-	struct intel_engine_cs *engine = i915->engine[RCS0];
-	struct intel_context *ce;
-	int ret;
-
-	ret = i915_mutex_lock_interruptible(&i915->drm);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/*
-	 * As the ID is the gtt offset of the context's vma we
-	 * pin the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	ce = intel_context_pin(ctx, engine);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (IS_ERR(ce))
-		return ce;
-
-	i915->perf.oa.pinned_ctx = ce;
-
-	return ce;
-}
-
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1244,7 +1217,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
-	ce = oa_pin_context(i915, stream->ctx);
+	ce = intel_context_pin_lock(stream->ctx, i915->engine[RCS0]);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
@@ -1286,19 +1259,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 			i915->perf.oa.specific_ctx_id_mask =
 				(1U << GEN8_CTX_ID_WIDTH) - 1;
 			i915->perf.oa.specific_ctx_id =
-				upper_32_bits(ce->lrc_desc);
-			i915->perf.oa.specific_ctx_id &=
 				i915->perf.oa.specific_ctx_id_mask;
 		}
 		break;
 
 	case 11: {
 		i915->perf.oa.specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
-			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
-			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
-		i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
-		i915->perf.oa.specific_ctx_id &=
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+		i915->perf.oa.specific_ctx_id =
 			i915->perf.oa.specific_ctx_id_mask;
 		break;
 	}
@@ -1307,6 +1275,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		MISSING_CASE(INTEL_GEN(i915));
 	}
 
+	ce->hw_tag = i915->perf.oa.specific_ctx_id_mask;
+	intel_context_pin_unlock(ce);
+
 	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
 			 i915->perf.oa.specific_ctx_id,
 			 i915->perf.oa.specific_ctx_id_mask);
@@ -1323,18 +1294,17 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
-	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-	dev_priv->perf.oa.specific_ctx_id_mask = 0;
-
-	ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
-	if (ce) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_context_unpin(ce);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+	ce = intel_context_pin_lock(stream->ctx, i915->engine[RCS0]);
+	if (!IS_ERR(ce)) {
+		ce->hw_tag = 0;
+		intel_context_pin_unlock(ce);
 	}
+
+	i915->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+	i915->perf.oa.specific_ctx_id_mask = 0;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 12893304c8f8..22b64fdb9396 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -673,7 +673,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -683,7 +682,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -691,10 +689,9 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+		      __entry->ctx, __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_request,
@@ -703,7 +700,6 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -712,16 +708,15 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno)
+		      __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -746,7 +741,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -757,7 +751,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -766,9 +759,9 @@ TRACE_EVENT(i915_request_in,
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->prio, __entry->port)
 );
 
@@ -778,7 +771,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -788,7 +780,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -796,10 +787,9 @@ TRACE_EVENT(i915_request_out,
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
-			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->completed)
+			      __entry->ctx, __entry->seqno, __entry->completed)
 );
 
 #else
@@ -837,7 +827,6 @@ TRACE_EVENT(i915_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -853,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -861,9 +849,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      !!(__entry->flags & I915_WAIT_LOCKED),
 		      __entry->flags)
 );
@@ -967,19 +955,17 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_STRUCT__entry(
 			__field(u32, dev)
 			__field(struct i915_gem_context *, ctx)
-			__field(u32, hw_id)
 			__field(struct i915_address_space *, vm)
 	),
 
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->hw_id = ctx->hw_id;
 			__entry->vm = ctx->ppgtt ? &ctx->ppgtt->vm : NULL;
 	),
 
-	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
-		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
+	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
+		  __entry->dev, __entry->ctx, __entry->vm)
 )
 
 DEFINE_EVENT(i915_context, i915_context_create,
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 5c3c33d05a6c..6744ffa598f7 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+	u32 hw_tag;
 
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 95a61b438cde..e18503957d29 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -221,6 +221,9 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	unsigned int hw_tag;
+#define NUM_HW_TAG (256)
+
 	struct intel_ring *buffer;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b1f03d335e74..6bc7861cab72 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -400,9 +400,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
 	desc = ctx->desc_template;				/* bits  0-11 */
 	GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
 
@@ -416,20 +413,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	 * anything below.
 	 */
 	if (INTEL_GEN(engine->i915) >= 11) {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-								/* bits 37-47 */
-
 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 								/* bits 48-53 */
 
-		/* TODO: decide what to do with SW counter (bits 55-60) */
-
 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 								/* bits 61-63 */
-	} else {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
 	return desc;
@@ -553,6 +541,16 @@ execlists_schedule_in(struct i915_request *rq, int idx)
 	if (!count) {
 		intel_context_get(ce);
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+
+		if (ce->hw_tag) {
+			ce->lrc_desc |= (u64)ce->hw_tag << 32;
+		} else {
+			ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+			ce->lrc_desc |=
+				(u64)(rq->engine->hw_tag++ % NUM_HW_TAG) <<
+				GEN11_SW_CTX_ID_SHIFT;
+		}
+
 		intel_engine_context_in(rq->engine);
 		ce->inflight = rq->engine;
 	}
@@ -1434,7 +1432,6 @@ static void execlists_context_destroy(struct kref *kref)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
 	intel_ring_unpin(ce->ring);
 }
@@ -1492,18 +1489,12 @@ __execlists_context_pin(struct intel_context *ce,
 	if (ret)
 		goto unpin_map;
 
-	ret = i915_gem_context_pin_hw_id(ce->gem_context);
-	if (ret)
-		goto unpin_ring;
-
 	ce->lrc_desc = lrc_descriptor(ce, engine);
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
 
-unpin_ring:
-	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
 unpin_active:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 753ae534df13..f68bc16d80d7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -467,8 +467,8 @@ static int igt_evict_contexts(void *arg)
 			if (IS_ERR(rq)) {
 				/* When full, fail_if_busy will trigger EBUSY */
 				if (PTR_ERR(rq) != -EBUSY) {
-					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
-					       ctx->hw_id, engine->name,
+					pr_err("Unexpected error from request alloc (on %s): %d\n",
+					       engine->name,
 					       (int)PTR_ERR(rq));
 					err = PTR_ERR(rq);
 				}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index b0a4296a0504..2965d405722e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
 		}
 
 		nc = 0;
-		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
+		for_each_prime_number(num_ctx, NUM_HW_TAG) {
 			for (; nc < num_ctx; nc++) {
 				ctx = mock_context(i915, "mock");
 				if (!ctx)
-- 
2.20.1