[Intel-gfx] [PATCH 20/27] drm/i915: Remove logical HW ID

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Sep 25 12:41:10 UTC 2019


[+ Daniele, I think he might want to have a look at this.]

On 25/09/2019 11:01, Chris Wilson wrote:
> With the introduction of ctx->engines[] we allow multiple logical
> contexts to be used on the same engine (e.g. with virtual engines). Each
> logical context requires a unique tag in order for context-switching to
> occur correctly between them.
> 
> We only need to keep a unique tag for the active lifetime of the
> context, and for as long as we need to identify that context. The HW
> uses the tag to determine if it should use a lite-restore (why not the
> LRCA?) and passes the tag back for various status identifies. The only
> status we need to track is for OA, so when using perf, we assign the
> specific context a unique tag.
> 
> Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines")

Fixes? Why?

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 147 ------------------
>   drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
>   .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
>   .../gpu/drm/i915/gem/selftests/mock_context.c |   8 -
>   drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   4 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  29 ++--
>   drivers/gpu/drm/i915/gvt/kvmgt.c              |  17 --
>   drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.c         |   7 +-
>   drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
>   drivers/gpu/drm/i915/i915_perf.c              |  25 ++-
>   drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
>   .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
>   16 files changed, 48 insertions(+), 284 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index e832238a72e5..4e59b809d901 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -168,97 +168,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
>   	return i915_gem_context_get_engine(ctx, idx);
>   }
>   
> -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
> -{
> -	unsigned int max;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	if (INTEL_GEN(i915) >= 12)
> -		max = GEN12_MAX_CONTEXT_HW_ID;
> -	else if (INTEL_GEN(i915) >= 11)
> -		max = GEN11_MAX_CONTEXT_HW_ID;
> -	else if (USES_GUC_SUBMISSION(i915))
> -		/*
> -		 * When using GuC in proxy submission, GuC consumes the
> -		 * highest bit in the context id to indicate proxy submission.
> -		 */
> -		max = MAX_GUC_CONTEXT_HW_ID;
> -	else
> -		max = MAX_CONTEXT_HW_ID;
> -
> -	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
> -}
> -
> -static int steal_hw_id(struct drm_i915_private *i915)
> -{
> -	struct i915_gem_context *ctx, *cn;
> -	LIST_HEAD(pinned);
> -	int id = -ENOSPC;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	list_for_each_entry_safe(ctx, cn,
> -				 &i915->contexts.hw_id_list, hw_id_link) {
> -		if (atomic_read(&ctx->hw_id_pin_count)) {
> -			list_move_tail(&ctx->hw_id_link, &pinned);
> -			continue;
> -		}
> -
> -		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
> -		list_del_init(&ctx->hw_id_link);
> -		id = ctx->hw_id;
> -		break;
> -	}
> -
> -	/*
> -	 * Remember how far we got up on the last repossesion scan, so the
> -	 * list is kept in a "least recently scanned" order.
> -	 */
> -	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
> -	return id;
> -}
> -
> -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
> -{
> -	int ret;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	/*
> -	 * We prefer to steal/stall ourselves and our users over that of the
> -	 * entire system. That may be a little unfair to our users, and
> -	 * even hurt high priority clients. The choice is whether to oomkill
> -	 * something else, or steal a context id.
> -	 */
> -	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> -	if (unlikely(ret < 0)) {
> -		ret = steal_hw_id(i915);
> -		if (ret < 0) /* once again for the correct errno code */
> -			ret = new_hw_id(i915, GFP_KERNEL);
> -		if (ret < 0)
> -			return ret;
> -	}
> -
> -	*out = ret;
> -	return 0;
> -}
> -
> -static void release_hw_id(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -
> -	if (list_empty(&ctx->hw_id_link))
> -		return;
> -
> -	mutex_lock(&i915->contexts.mutex);
> -	if (!list_empty(&ctx->hw_id_link)) {
> -		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
> -		list_del_init(&ctx->hw_id_link);
> -	}
> -	mutex_unlock(&i915->contexts.mutex);
> -}
> -
>   static void __free_engines(struct i915_gem_engines *e, unsigned int count)
>   {
>   	while (count--) {
> @@ -313,8 +222,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
> -	release_hw_id(ctx);
> -
>   	free_engines(rcu_access_pointer(ctx->engines));
>   	mutex_destroy(&ctx->engines_mutex);
>   
> @@ -459,12 +366,6 @@ static void context_close(struct i915_gem_context *ctx)
>   
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   
> -	/*
> -	 * This context will never again be assinged to HW, so we can
> -	 * reuse its ID for the next context.
> -	 */
> -	release_hw_id(ctx);
> -
>   	/*
>   	 * The LUT uses the VMA as a backpointer to unref the object,
>   	 * so we need to clear the LUT before we close all the VMA (inside
> @@ -507,7 +408,6 @@ __create_context(struct drm_i915_private *i915)
>   	RCU_INIT_POINTER(ctx->engines, e);
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->hw_id_link);
>   
>   	/* NB: Mark all slices as needing a remap so that when the context first
>   	 * loads it will restore whatever remap state already exists. If there
> @@ -676,18 +576,11 @@ struct i915_gem_context *
>   i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   {
>   	struct i915_gem_context *ctx;
> -	int err;
>   
>   	ctx = i915_gem_create_context(i915, 0);
>   	if (IS_ERR(ctx))
>   		return ctx;
>   
> -	err = i915_gem_context_pin_hw_id(ctx);
> -	if (err) {
> -		destroy_kernel_context(&ctx);
> -		return ERR_PTR(err);
> -	}
> -
>   	i915_gem_context_clear_bannable(ctx);
>   	i915_gem_context_set_persistence(ctx);
>   	ctx->sched.priority = I915_USER_PRIORITY(prio);
> @@ -727,15 +620,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>   		DRM_ERROR("Failed to create default global context\n");
>   		return PTR_ERR(ctx);
>   	}
> -	/*
> -	 * For easy recognisablity, we want the kernel context to be 0 and then
> -	 * all user contexts will have non-zero hw_id. Kernel contexts are
> -	 * permanently pinned, so that we never suffer a stall and can
> -	 * use them from any allocation context (e.g. for evicting other
> -	 * contexts and from inside the shrinker).
> -	 */
> -	GEM_BUG_ON(ctx->hw_id);
> -	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
>   	dev_priv->kernel_context = ctx;
>   
>   	DRM_DEBUG_DRIVER("%s context support initialized\n",
> @@ -749,10 +633,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
>   	destroy_kernel_context(&i915->kernel_context);
> -
> -	/* Must free all deferred contexts (via flush_workqueue) first */
> -	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
> -	ida_destroy(&i915->contexts.hw_ida);
>   }
>   
>   static int context_idr_cleanup(int id, void *p, void *data)
> @@ -2438,33 +2318,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
>   	return ret;
>   }
>   
> -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -	int err = 0;
> -
> -	mutex_lock(&i915->contexts.mutex);
> -
> -	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
> -
> -	if (list_empty(&ctx->hw_id_link)) {
> -		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
> -
> -		err = assign_hw_id(i915, &ctx->hw_id);
> -		if (err)
> -			goto out_unlock;
> -
> -		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
> -	}
> -
> -	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
> -	atomic_inc(&ctx->hw_id_pin_count);
> -
> -out_unlock:
> -	mutex_unlock(&i915->contexts.mutex);
> -	return err;
> -}
> -
>   /* GEM context-engines iterator: for_each_gem_engine() */
>   struct intel_context *
>   i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index e0f5b6c6a331..1b0df53436cf 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -127,21 +127,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
>   	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
>   }
>   
> -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
> -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> -{
> -	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
> -		return 0;
> -
> -	return __i915_gem_context_pin_hw_id(ctx);
> -}
> -
> -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
> -{
> -	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
> -	atomic_dec(&ctx->hw_id_pin_count);
> -}
> -
>   static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
>   {
>   	return !ctx->file_priv;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index daf1ea5075a6..6419da7c9f90 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -148,24 +148,6 @@ struct i915_gem_context {
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   #define CONTEXT_USER_ENGINES		3
>   
> -	/**
> -	 * @hw_id: - unique identifier for the context
> -	 *
> -	 * The hardware needs to uniquely identify the context for a few
> -	 * functions like fault reporting, PASID, scheduling. The
> -	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
> -	 * id for the lifetime of the context.
> -	 *
> -	 * @hw_id_pin_count: - number of times this context had been pinned
> -	 * for use (should be, at most, once per engine).
> -	 *
> -	 * @hw_id_link: - all contexts with an assigned id are tracked
> -	 * for possible repossession.
> -	 */
> -	unsigned int hw_id;
> -	atomic_t hw_id_pin_count;
> -	struct list_head hw_id_link;
> -
>   	struct mutex mutex;
>   
>   	struct i915_sched_attr sched;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index b39224ed3817..a8a9293f4ac6 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -446,9 +446,9 @@ static int igt_ctx_exec(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       engine->name, ctx->hw_id,
> +				       engine->name,
>   				       yesno(!!ctx->vm), err);
>   				intel_context_put(ce);
>   				kernel_context_close(ctx);
> @@ -584,9 +584,9 @@ static int igt_shared_ctx_exec(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       engine->name, ctx->hw_id,
> +				       engine->name,
>   				       yesno(!!ctx->vm), err);
>   				intel_context_put(ce);
>   				kernel_context_close(ctx);
> @@ -1168,10 +1168,9 @@ static int igt_ctx_readonly(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       ce->engine->name, ctx->hw_id,
> -				       yesno(!!ctx->vm), err);
> +				       ce->engine->name, yesno(!!ctx->vm), err);
>   				i915_gem_context_unlock_engines(ctx);
>   				goto out_unlock;
>   			}
> diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> index 0d1a275ec316..ebc46f098561 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
>   {
>   	struct i915_gem_context *ctx;
>   	struct i915_gem_engines *e;
> -	int ret;
>   
>   	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>   	if (!ctx)
> @@ -32,13 +31,8 @@ mock_context(struct drm_i915_private *i915,
>   	RCU_INIT_POINTER(ctx->engines, e);
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->hw_id_link);
>   	mutex_init(&ctx->mutex);
>   
> -	ret = i915_gem_context_pin_hw_id(ctx);
> -	if (ret < 0)
> -		goto err_engines;
> -
>   	if (name) {
>   		struct i915_ppgtt *ppgtt;
>   
> @@ -56,8 +50,6 @@ mock_context(struct drm_i915_private *i915,
>   
>   	return ctx;
>   
> -err_engines:
> -	free_engines(rcu_access_pointer(ctx->engines));
>   err_free:
>   	kfree(ctx);
>   	return NULL;
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index bf9cedfccbf0..6959b05ae5f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -58,6 +58,7 @@ struct intel_context {
>   
>   	u32 *lrc_reg_state;
>   	u64 lrc_desc;
> +	u32 tag; /* cookie passed to HW to track this context on submission */
>   
>   	unsigned int active_count; /* protected by timeline->mutex */
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index da4b45aa84b1..7bce176e696c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -296,10 +296,12 @@ struct intel_engine_cs {
>   	u8 uabi_class;
>   	u8 uabi_instance;
>   
> +	u32 uabi_capabilities;
>   	u32 context_size;
>   	u32 mmio_base;
>   
> -	u32 uabi_capabilities;
> +	unsigned int context_tag;
> +#define NUM_CONTEXT_TAG (256)

AFAICT this define now defines how deep ELSP we can have before we start 
getting hard to debug problems. GuC angle needs considering here.

Can we add some asserts somewhere to express the relationship?

And a comment here explaining this, or with the assert.

>   
>   	struct rb_node uabi_node;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0ea2f4d7dc43..af79b8d70800 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -433,12 +433,8 @@ assert_priority_queue(const struct i915_request *prev,
>   static u64
>   lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   {
> -	struct i915_gem_context *ctx = ce->gem_context;
>   	u64 desc;
>   
> -	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
> -	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
> -
>   	desc = INTEL_LEGACY_32B_CONTEXT;
>   	if (i915_vm_is_4lvl(ce->vm))
>   		desc = INTEL_LEGACY_64B_CONTEXT;
> @@ -456,20 +452,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   	 * anything below.
>   	 */
>   	if (INTEL_GEN(engine->i915) >= 11) {
> -		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
> -		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
> -								/* bits 37-47 */
> -
>   		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
>   								/* bits 48-53 */
>   
> -		/* TODO: decide what to do with SW counter (bits 55-60) */
> -
>   		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
>   								/* bits 61-63 */
> -	} else {
> -		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
> -		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
>   	}
>   
>   	return desc;
> @@ -978,6 +965,15 @@ __execlists_schedule_in(struct i915_request *rq)
>   
>   	intel_context_get(ce);
>   
> +	if (ce->tag) {
> +		ce->lrc_desc |= (u64)ce->tag << 32;
> +	} else {
> +		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
> +		ce->lrc_desc |=
> +			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
> +			GEN11_SW_CTX_ID_SHIFT;
> +	}

So hw id is valid only while context is in ELSP and it changes with 
every submission except in the OA case?

Shifts and masks look dodgy. For >= gen11 the current code has the hw_id 
in [37, 42], otherwise [32, 52]. This patch does not seem to handle the 
differences between gens.

> +
>   	intel_gt_pm_get(engine->gt);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
>   	intel_engine_context_in(engine);
> @@ -2224,7 +2220,6 @@ static void execlists_context_unpin(struct intel_context *ce)
>   	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
>   		      ce->engine);
>   
> -	i915_gem_context_unpin_hw_id(ce->gem_context);
>   	i915_gem_object_unpin_map(ce->state->obj);
>   	intel_ring_reset(ce->ring, ce->ring->tail);
>   }
> @@ -2274,18 +2269,12 @@ __execlists_context_pin(struct intel_context *ce,
>   		goto unpin_active;
>   	}
>   
> -	ret = i915_gem_context_pin_hw_id(ce->gem_context);
> -	if (ret)
> -		goto unpin_map;
> -
>   	ce->lrc_desc = lrc_descriptor(ce, engine);
>   	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>   	__execlists_update_reg_state(ce, engine);
>   
>   	return 0;
>   
> -unpin_map:
> -	i915_gem_object_unpin_map(ce->state->obj);
>   unpin_active:
>   	intel_context_active_release(ce);
>   err:
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 343d79c1cb7e..04a5a0d90823 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr,
>   	return sprintf(buf, "\n");
>   }
>   
> -static ssize_t
> -hw_id_show(struct device *dev, struct device_attribute *attr,
> -	   char *buf)
> -{
> -	struct mdev_device *mdev = mdev_from_dev(dev);
> -
> -	if (mdev) {
> -		struct intel_vgpu *vgpu = (struct intel_vgpu *)
> -			mdev_get_drvdata(mdev);
> -		return sprintf(buf, "%u\n",
> -			       vgpu->submission.shadow[0]->gem_context->hw_id);
> -	}
> -	return sprintf(buf, "\n");
> -}
> -
>   static DEVICE_ATTR_RO(vgpu_id);
> -static DEVICE_ATTR_RO(hw_id);
>   
>   static struct attribute *intel_vgpu_attrs[] = {
>   	&dev_attr_vgpu_id.attr,
> -	&dev_attr_hw_id.attr,
>   	NULL
>   };
>   
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index fd41505d52ec..8caaa446490f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1504,9 +1504,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   		struct intel_context *ce;
>   
>   		seq_puts(m, "HW context ");
> -		if (!list_empty(&ctx->hw_id_link))
> -			seq_printf(m, "%x [pin %u]", ctx->hw_id,
> -				   atomic_read(&ctx->hw_id_pin_count));
>   		if (ctx->pid) {
>   			struct task_struct *task;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a86d28bda6dd..47239df653f2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -471,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
>   				const char *header,
>   				const struct drm_i915_error_context *ctx)
>   {
> -	err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
> -		   header, ctx->comm, ctx->pid, ctx->hw_id,
> -		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
> +	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
> +		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
> +		   ctx->guilty, ctx->active);
>   }
>   
>   static void error_print_engine(struct drm_i915_error_state_buf *m,
> @@ -1262,7 +1262,6 @@ static bool record_context(struct drm_i915_error_context *e,
>   		rcu_read_unlock();
>   	}
>   
> -	e->hw_id = ctx->hw_id;
>   	e->sched_attr = ctx->sched;
>   	e->guilty = atomic_read(&ctx->guilty_count);
>   	e->active = atomic_read(&ctx->active_count);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index caaed5093d95..4dc36d6ee3a2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -117,7 +117,6 @@ struct i915_gpu_state {
>   		struct drm_i915_error_context {
>   			char comm[TASK_COMM_LEN];
>   			pid_t pid;
> -			u32 hw_id;
>   			int active;
>   			int guilty;
>   			struct i915_sched_attr sched_attr;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 80055501eccb..d36ba248d438 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   		} else {
>   			stream->specific_ctx_id_mask =
>   				(1U << GEN8_CTX_ID_WIDTH) - 1;
> -			stream->specific_ctx_id =
> -				upper_32_bits(ce->lrc_desc);
> -			stream->specific_ctx_id &=
> -				stream->specific_ctx_id_mask;
> +			stream->specific_ctx_id = stream->specific_ctx_id_mask;
>   		}
>   		break;
>   
>   	case 11:
>   	case 12: {
>   		stream->specific_ctx_id_mask =
> -			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
> -			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
> -			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
> -		stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
> -		stream->specific_ctx_id &=
> -			stream->specific_ctx_id_mask;
> +			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
> +		stream->specific_ctx_id = stream->specific_ctx_id_mask;
>   		break;
>   	}
>   
> @@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   		MISSING_CASE(INTEL_GEN(i915));
>   	}
>   
> +	ce->tag = stream->specific_ctx_id_mask;
> +
>   	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
>   			 stream->specific_ctx_id,
>   			 stream->specific_ctx_id_mask);
> @@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
>   {
>   	struct intel_context *ce;
>   
> -	stream->specific_ctx_id = INVALID_CTX_ID;
> -	stream->specific_ctx_id_mask = 0;
> -
>   	ce = fetch_and_zero(&stream->pinned_ctx);
> -	if (ce)
> +	if (ce) {
> +		ce->tag = 0;
>   		intel_context_unpin(ce);
> +	}
> +
> +	stream->specific_ctx_id = INVALID_CTX_ID;
> +	stream->specific_ctx_id_mask = 0;

OA hunks looks dodgy as well. ce->tag is set to 
stream->specific_ctx_id_mask while I think it should be id. Furthermore 
id is assigned from mask which is fixed and not unique for different 
contexts.

>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 24f2944da09d..1f2cf6cfafb5 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
>   			   __entry->flags = flags;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> -		      __entry->flags)
> +		      __entry->ctx, __entry->seqno, __entry->flags)
>   );
>   
>   DECLARE_EVENT_CLASS(i915_request,
> @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno)
> +		      __entry->ctx, __entry->seqno)
>   );
>   
>   DEFINE_EVENT(i915_request, i915_request_add,
> @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
>   			   __entry->port = port;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> +		      __entry->ctx, __entry->seqno,
>   		      __entry->prio, __entry->port)
>   );
>   
> @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
>   			   __entry->completed = i915_request_completed(rq);
>   			   ),
>   
> -		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
> +		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
>   			      __entry->dev, __entry->class, __entry->instance,
> -			      __entry->hw_id, __entry->ctx, __entry->seqno,
> -			      __entry->completed)
> +			      __entry->ctx, __entry->seqno, __entry->completed)
>   );
>   
>   #else
> @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
>   	     */
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
>   			   __entry->flags = flags;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> +		      __entry->ctx, __entry->seqno,
>   		      __entry->flags)
>   );
>   
> @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
>   	TP_STRUCT__entry(
>   			__field(u32, dev)
>   			__field(struct i915_gem_context *, ctx)
> -			__field(u32, hw_id)
>   			__field(struct i915_address_space *, vm)
>   	),
>   
>   	TP_fast_assign(
>   			__entry->dev = ctx->i915->drm.primary->index;
>   			__entry->ctx = ctx;
> -			__entry->hw_id = ctx->hw_id;
>   			__entry->vm = ctx->vm;
>   	),
>   
> -	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
> -		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
> +	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
> +		  __entry->dev, __entry->ctx, __entry->vm)
>   )
>   
>   DEFINE_EVENT(i915_context, i915_context_create,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 52d2df843148..f39f0282e78c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg)
>   			if (IS_ERR(rq)) {
>   				/* When full, fail_if_busy will trigger EBUSY */
>   				if (PTR_ERR(rq) != -EBUSY) {
> -					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
> -					       ctx->hw_id, engine->name,
> +					pr_err("Unexpected error from request alloc (on %s): %d\n",
> +					       engine->name,
>   					       (int)PTR_ERR(rq));
>   					err = PTR_ERR(rq);
>   				}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index 1c9db08f7c28..ac1ff558eb90 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
>   		}
>   
>   		nc = 0;
> -		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
> +		for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
>   			for (; nc < num_ctx; nc++) {
>   				ctx = mock_context(i915, "mock");
>   				if (!ctx)
> 

Unless I am missing something I see this patch as simplification and not 
a fix. If we can get away with it in the GuC world then it's quite a big 
simplification so it's fine by me.

Regards,

Tvrtko


More information about the Intel-gfx mailing list