[PATCH 2/2] drm/amdgpu: rework ctx entity creation

Thu Aug 16 02:07:06 UTC 2018


On 2018年08月15日 18:59, Christian König wrote:
> Use a fixed number of entities for each hardware IP.
>
> The number of compute entities is reduced to four, SDMA keeps it two
> entities and all other engines just expose one entity.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 291 ++++++++++++++++----------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  30 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  36 ++--
>   3 files changed, 190 insertions(+), 167 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 0a6cd1202ee5..987b7f256463 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -27,8 +27,29 @@
>   #include "amdgpu.h"
>   #include "amdgpu_sched.h"
>   
> -#define to_amdgpu_ctx_ring(e)	\
> -	container_of((e), struct amdgpu_ctx_ring, entity)
> +#define to_amdgpu_ctx_entity(e)	\
> +	container_of((e), struct amdgpu_ctx_entity, entity)
> +
> +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
> +	[AMDGPU_HW_IP_GFX]	=	1,
> +	[AMDGPU_HW_IP_COMPUTE]	=	4,
Could you explain why reduct to four? otherwise it looks good to me.

Thanks,
David Zhou
> +	[AMDGPU_HW_IP_DMA]	=	2,
> +	[AMDGPU_HW_IP_UVD]	=	1,
> +	[AMDGPU_HW_IP_VCE]	=	1,
> +	[AMDGPU_HW_IP_UVD_ENC]	=	1,
> +	[AMDGPU_HW_IP_VCN_DEC]	=	1,
> +	[AMDGPU_HW_IP_VCN_ENC]	=	1,
> +};
> +
> +static int amdgput_ctx_total_num_entities(void)
> +{
> +	unsigned i, num_entities = 0;
> +
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		num_entities += amdgpu_ctx_num_entities[i];
> +
> +	return num_entities;
> +}
>   
>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>   				      enum drm_sched_priority priority)
> @@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			   struct drm_file *filp,
>   			   struct amdgpu_ctx *ctx)
>   {
> -	struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];
> -	struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];
> -	unsigned i, j, num_sdma_rqs, num_comp_rqs;
> +	unsigned num_entities = amdgput_ctx_total_num_entities();
> +	unsigned i, j;
>   	int r;
>   
>   	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> @@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   
>   	memset(ctx, 0, sizeof(*ctx));
>   	ctx->adev = adev;
> -	kref_init(&ctx->refcount);
> -	spin_lock_init(&ctx->ring_lock);
> -	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
> +
> +	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
>   			      sizeof(struct dma_fence*), GFP_KERNEL);
>   	if (!ctx->fences)
>   		return -ENOMEM;
>   
> -	mutex_init(&ctx->lock);
> +	ctx->entities[0] = kcalloc(num_entities,
> +				   sizeof(struct amdgpu_ctx_entity),
> +				   GFP_KERNEL);
> +	if (!ctx->entities[0]) {
> +		r = -ENOMEM;
> +		goto error_free_fences;
> +	}
>   
> -	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> -		ctx->rings[i].sequence = 1;
> -		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
> +	for (i = 0; i < num_entities; ++i) {
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +
> +		entity->sequence = 1;
> +		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
>   	}
> +	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> +		ctx->entities[i] = ctx->entities[i - 1] +
> +			amdgpu_ctx_num_entities[i - 1];
> +
> +	kref_init(&ctx->refcount);
> +	spin_lock_init(&ctx->ring_lock);
> +	mutex_init(&ctx->lock);
>   
>   	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>   	ctx->reset_counter_query = ctx->reset_counter;
> @@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	ctx->init_priority = priority;
>   	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>   
> -	num_sdma_rqs = 0;
> -	num_comp_rqs = 0;
> -	for (i = 0; i < adev->num_rings; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		struct drm_sched_rq *rq;
> -
> -		rq = &ring->sched.sched_rq[priority];
> -		if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
> -			sdma_rqs[num_sdma_rqs++] = rq;
> -		else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> -			comp_rqs[num_comp_rqs++] = rq;
> -	}
> -
> -	/* create context entity for each ring */
> -	for (i = 0; i < adev->num_rings; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> +		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
> +		unsigned num_rings;
> +
> +		switch (i) {
> +		case AMDGPU_HW_IP_GFX:
> +			rings[0] = &adev->gfx.gfx_ring[0];
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_COMPUTE:
> +			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
> +				rings[j] = &adev->gfx.compute_ring[j];
> +			num_rings = adev->gfx.num_compute_rings;
> +			break;
> +		case AMDGPU_HW_IP_DMA:
> +			for (j = 0; j < adev->sdma.num_instances; ++j)
> +				rings[j] = &adev->sdma.instance[j].ring;
> +			num_rings = adev->sdma.num_instances;
> +			break;
> +		case AMDGPU_HW_IP_UVD:
> +			rings[0] = &adev->uvd.inst[0].ring;
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_VCE:
> +			rings[0] = &adev->vce.ring[0];
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_UVD_ENC:
> +			rings[0] = &adev->uvd.inst[0].ring_enc[0];
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_VCN_DEC:
> +			rings[0] = &adev->vcn.ring_dec;
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_VCN_ENC:
> +			rings[0] = &adev->vcn.ring_enc[0];
> +			num_rings = 1;
> +			break;
> +		case AMDGPU_HW_IP_VCN_JPEG:
> +			rings[0] = &adev->vcn.ring_jpeg;
> +			num_rings = 1;
> +			break;
> +		}
>   
> -		if (ring == &adev->gfx.kiq.ring)
> -			continue;
> +		for (j = 0; j < num_rings; ++j)
> +			rqs[j] = &rings[j]->sched.sched_rq[priority];
>   
> -		if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
> -			r = drm_sched_entity_init(&ctx->rings[i].entity,
> -						  sdma_rqs, num_sdma_rqs,
> -						  &ctx->guilty);
> -		} else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> -			r = drm_sched_entity_init(&ctx->rings[i].entity,
> -						  comp_rqs, num_comp_rqs,
> -						  &ctx->guilty);
> -		} else {
> -			struct drm_sched_rq *rq;
> -
> -			rq = &ring->sched.sched_rq[priority];
> -			r = drm_sched_entity_init(&ctx->rings[i].entity,
> -						  &rq, 1, &ctx->guilty);
> -		}
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> +						  rqs, num_rings, &ctx->guilty);
>   		if (r)
> -			goto failed;
> +			goto error_cleanup_entities;
>   	}
>   
>   	return 0;
>   
> -failed:
> -	for (j = 0; j < i; j++)
> -		drm_sched_entity_destroy(&ctx->rings[j].entity);
> +error_cleanup_entities:
> +	for (i = 0; i < num_entities; ++i)
> +		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +	kfree(ctx->entities[0]);
> +
> +error_free_fences:
>   	kfree(ctx->fences);
>   	ctx->fences = NULL;
>   	return r;
> @@ -137,17 +191,18 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   static void amdgpu_ctx_fini(struct kref *ref)
>   {
>   	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> +	unsigned num_entities = amdgput_ctx_total_num_entities();
>   	struct amdgpu_device *adev = ctx->adev;
>   	unsigned i, j;
>   
>   	if (!adev)
>   		return;
>   
> -	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
> +	for (i = 0; i < num_entities; ++i)
>   		for (j = 0; j < amdgpu_sched_jobs; ++j)
> -			dma_fence_put(ctx->rings[i].fences[j]);
> +			dma_fence_put(ctx->entities[0][i].fences[j]);
>   	kfree(ctx->fences);
> -	ctx->fences = NULL;
> +	kfree(ctx->entities[0]);
>   
>   	mutex_destroy(&ctx->lock);
>   
> @@ -157,9 +212,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
>   int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   			  u32 ring, struct drm_sched_entity **entity)
>   {
> -	struct amdgpu_device *adev = ctx->adev;
> -	unsigned num_rings = 0;
> -	struct amdgpu_ring *out_ring;
> +	if (hw_ip >= AMDGPU_HW_IP_NUM) {
> +		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
> +		return -EINVAL;
> +	}
>   
>   	/* Right now all IPs have only one instance - multiple rings. */
>   	if (instance != 0) {
> @@ -167,52 +223,12 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   		return -EINVAL;
>   	}
>   
> -	switch (hw_ip) {
> -	case AMDGPU_HW_IP_GFX:
> -		out_ring = &adev->gfx.gfx_ring[ring];
> -		num_rings = adev->gfx.num_gfx_rings;
> -		break;
> -	case AMDGPU_HW_IP_COMPUTE:
> -		out_ring = &adev->gfx.compute_ring[ring];
> -		num_rings = adev->gfx.num_compute_rings;
> -		break;
> -	case AMDGPU_HW_IP_DMA:
> -		out_ring = &adev->sdma.instance[ring].ring;
> -		num_rings = adev->sdma.num_instances;
> -		break;
> -	case AMDGPU_HW_IP_UVD:
> -		out_ring = &adev->uvd.inst[0].ring;
> -		num_rings = adev->uvd.num_uvd_inst;
> -		break;
> -	case AMDGPU_HW_IP_VCE:
> -		out_ring = &adev->vce.ring[ring];
> -		num_rings = adev->vce.num_rings;
> -		break;
> -	case AMDGPU_HW_IP_UVD_ENC:
> -		out_ring = &adev->uvd.inst[0].ring_enc[ring];
> -		num_rings = adev->uvd.num_enc_rings;
> -		break;
> -	case AMDGPU_HW_IP_VCN_DEC:
> -		out_ring = &adev->vcn.ring_dec;
> -		num_rings = 1;
> -		break;
> -	case AMDGPU_HW_IP_VCN_ENC:
> -		out_ring = &adev->vcn.ring_enc[ring];
> -		num_rings = adev->vcn.num_enc_rings;
> -		break;
> -	case AMDGPU_HW_IP_VCN_JPEG:
> -		out_ring = &adev->vcn.ring_jpeg;
> -		num_rings = 1;
> -		break;
> -	default:
> -		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
> +	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
> +		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
>   		return -EINVAL;
>   	}
>   
> -	if (ring > num_rings)
> -		return -EINVAL;
> -
> -	*entity = &ctx->rings[out_ring->idx].entity;
> +	*entity = &ctx->entities[hw_ip][ring].entity;
>   	return 0;
>   }
>   
> @@ -252,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>   static void amdgpu_ctx_do_release(struct kref *ref)
>   {
>   	struct amdgpu_ctx *ctx;
> +	unsigned num_entities;
>   	u32 i;
>   
>   	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>   
> -	for (i = 0; i < ctx->adev->num_rings; i++) {
> +	num_entities = 0;
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
> +		num_entities += amdgpu_ctx_num_entities[i];
>   
> -		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
> -			continue;
> -
> -		drm_sched_entity_destroy(&ctx->rings[i].entity);
> -	}
> +	for (i = 0; i < num_entities; i++)
> +		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
>   
>   	amdgpu_ctx_fini(ref);
>   }
> @@ -422,21 +438,21 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
>   			 struct drm_sched_entity *entity,
>   			 struct dma_fence *fence, uint64_t* handle)
>   {
> -	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
> -	uint64_t seq = cring->sequence;
> +	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
> +	uint64_t seq = centity->sequence;
>   	struct dma_fence *other = NULL;
>   	unsigned idx = 0;
>   
>   	idx = seq & (amdgpu_sched_jobs - 1);
> -	other = cring->fences[idx];
> +	other = centity->fences[idx];
>   	if (other)
>   		BUG_ON(!dma_fence_is_signaled(other));
>   
>   	dma_fence_get(fence);
>   
>   	spin_lock(&ctx->ring_lock);
> -	cring->fences[idx] = fence;
> -	cring->sequence++;
> +	centity->fences[idx] = fence;
> +	centity->sequence++;
>   	spin_unlock(&ctx->ring_lock);
>   
>   	dma_fence_put(other);
> @@ -450,26 +466,26 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>   				       struct drm_sched_entity *entity,
>   				       uint64_t seq)
>   {
> -	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
> +	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
>   	struct dma_fence *fence;
>   
>   	spin_lock(&ctx->ring_lock);
>   
>   	if (seq == ~0ull)
> -		seq = cring->sequence - 1;
> +		seq = centity->sequence - 1;
>   
> -	if (seq >= cring->sequence) {
> +	if (seq >= centity->sequence) {
>   		spin_unlock(&ctx->ring_lock);
>   		return ERR_PTR(-EINVAL);
>   	}
>   
>   
> -	if (seq + amdgpu_sched_jobs < cring->sequence) {
> +	if (seq + amdgpu_sched_jobs < centity->sequence) {
>   		spin_unlock(&ctx->ring_lock);
>   		return NULL;
>   	}
>   
> -	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
> +	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
>   	spin_unlock(&ctx->ring_lock);
>   
>   	return fence;
> @@ -478,23 +494,17 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>   void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>   				  enum drm_sched_priority priority)
>   {
> -	int i;
> -	struct amdgpu_device *adev = ctx->adev;
> -	struct drm_sched_entity *entity;
> -	struct amdgpu_ring *ring;
> +	unsigned num_entities = amdgput_ctx_total_num_entities();
>   	enum drm_sched_priority ctx_prio;
> +	unsigned i;
>   
>   	ctx->override_priority = priority;
>   
>   	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>   			ctx->init_priority : ctx->override_priority;
>   
> -	for (i = 0; i < adev->num_rings; i++) {
> -		ring = adev->rings[i];
> -		entity = &ctx->rings[i].entity;
> -
> -		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
> -			continue;
> +	for (i = 0; i < num_entities; i++) {
> +		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
>   
>   		drm_sched_entity_set_priority(entity, ctx_prio);
>   	}
> @@ -503,9 +513,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>   int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
>   			       struct drm_sched_entity *entity)
>   {
> -	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
> -	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
> -	struct dma_fence *other = cring->fences[idx];
> +	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
> +	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
> +	struct dma_fence *other = centity->fences[idx];
>   
>   	if (other) {
>   		signed long r;
> @@ -529,6 +539,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>   
>   void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
>   {
> +	unsigned num_entities = amdgput_ctx_total_num_entities();
>   	struct amdgpu_ctx *ctx;
>   	struct idr *idp;
>   	uint32_t id, i;
> @@ -544,13 +555,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
>   			return;
>   		}
>   
> -		for (i = 0; i < ctx->adev->num_rings; i++) {
> +		for (i = 0; i < num_entities; i++) {
> +			struct drm_sched_entity *entity;
>   
> -			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
> -				continue;
> -
> -			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
> -							  max_wait);
> +			entity = &ctx->entities[0][i].entity;
> +			max_wait = drm_sched_entity_flush(entity, max_wait);
>   		}
>   	}
>   	mutex_unlock(&mgr->lock);
> @@ -558,6 +567,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
>   
>   void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>   {
> +	unsigned num_entities = amdgput_ctx_total_num_entities();
>   	struct amdgpu_ctx *ctx;
>   	struct idr *idp;
>   	uint32_t id, i;
> @@ -569,16 +579,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>   		if (!ctx->adev)
>   			return;
>   
> -		for (i = 0; i < ctx->adev->num_rings; i++) {
> -
> -			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
> -				continue;
> -
> -			if (kref_read(&ctx->refcount) == 1)
> -				drm_sched_entity_fini(&ctx->rings[i].entity);
> -			else
> -				DRM_ERROR("ctx %p is still alive\n", ctx);
> +		if (kref_read(&ctx->refcount) != 1) {
> +			DRM_ERROR("ctx %p is still alive\n", ctx);
> +			continue;
>   		}
> +
> +		for (i = 0; i < num_entities; i++)
> +			drm_sched_entity_fini(&ctx->entities[0][i].entity);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 609f925b076c..d67c1d285a4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -29,26 +29,26 @@ struct drm_device;
>   struct drm_file;
>   struct amdgpu_fpriv;
>   
> -struct amdgpu_ctx_ring {
> +struct amdgpu_ctx_entity {
>   	uint64_t		sequence;
>   	struct dma_fence	**fences;
>   	struct drm_sched_entity	entity;
>   };
>   
>   struct amdgpu_ctx {
> -	struct kref		refcount;
> -	struct amdgpu_device    *adev;
> -	unsigned		reset_counter;
> -	unsigned		reset_counter_query;
> -	uint32_t		vram_lost_counter;
> -	spinlock_t		ring_lock;
> -	struct dma_fence	**fences;
> -	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
> -	bool			preamble_presented;
> -	enum drm_sched_priority init_priority;
> -	enum drm_sched_priority override_priority;
> -	struct mutex            lock;
> -	atomic_t		guilty;
> +	struct kref			refcount;
> +	struct amdgpu_device		*adev;
> +	unsigned			reset_counter;
> +	unsigned			reset_counter_query;
> +	uint32_t			vram_lost_counter;
> +	spinlock_t			ring_lock;
> +	struct dma_fence		**fences;
> +	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
> +	bool				preamble_presented;
> +	enum drm_sched_priority		init_priority;
> +	enum drm_sched_priority		override_priority;
> +	struct mutex			lock;
> +	atomic_t			guilty;
>   };
>   
>   struct amdgpu_ctx_mgr {
> @@ -58,6 +58,8 @@ struct amdgpu_ctx_mgr {
>   	struct idr		ctx_handles;
>   };
>   
> +extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
> +
>   struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
>   int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 40fd591c9980..82177baade96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -270,7 +270,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	uint32_t ib_start_alignment = 0;
>   	uint32_t ib_size_alignment = 0;
>   	enum amd_ip_block_type type;
> -	uint32_t ring_mask = 0;
> +	unsigned int num_rings = 0;
>   	unsigned int i, j;
>   
>   	if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
> @@ -280,21 +280,24 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_GFX:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			ring_mask |= adev->gfx.gfx_ring[i].ready << i;
> +			if (adev->gfx.gfx_ring[i].ready)
> +				++num_rings;
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
>   		break;
>   	case AMDGPU_HW_IP_COMPUTE:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			ring_mask |= adev->gfx.compute_ring[i].ready << i;
> +			if (adev->gfx.compute_ring[i].ready)
> +				++num_rings;
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
>   		break;
>   	case AMDGPU_HW_IP_DMA:
>   		type = AMD_IP_BLOCK_TYPE_SDMA;
>   		for (i = 0; i < adev->sdma.num_instances; i++)
> -			ring_mask |= adev->sdma.instance[i].ring.ready << i;
> +			if (adev->sdma.instance[i].ring.ready)
> +				++num_rings;
>   		ib_start_alignment = 256;
>   		ib_size_alignment = 4;
>   		break;
> @@ -303,7 +306,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
>   			if (adev->uvd.harvest_config & (1 << i))
>   				continue;
> -			ring_mask |= adev->uvd.inst[i].ring.ready;
> +
> +			if (adev->uvd.inst[i].ring.ready)
> +				++num_rings;
>   		}
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 64;
> @@ -311,7 +316,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_VCE:
>   		type = AMD_IP_BLOCK_TYPE_VCE;
>   		for (i = 0; i < adev->vce.num_rings; i++)
> -			ring_mask |= adev->vce.ring[i].ready << i;
> +			if (adev->vce.ring[i].ready)
> +				++num_rings;
>   		ib_start_alignment = 4;
>   		ib_size_alignment = 1;
>   		break;
> @@ -320,28 +326,33 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
>   			if (adev->uvd.harvest_config & (1 << i))
>   				continue;
> +
>   			for (j = 0; j < adev->uvd.num_enc_rings; j++)
> -				ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
> +				if (adev->uvd.inst[i].ring_enc[j].ready)
> +					++num_rings;
>   		}
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 64;
>   		break;
>   	case AMDGPU_HW_IP_VCN_DEC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		ring_mask = adev->vcn.ring_dec.ready;
> +		if (adev->vcn.ring_dec.ready)
> +			++num_rings;
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
>   		break;
>   	case AMDGPU_HW_IP_VCN_ENC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
>   		for (i = 0; i < adev->vcn.num_enc_rings; i++)
> -			ring_mask |= adev->vcn.ring_enc[i].ready << i;
> +			if (adev->vcn.ring_enc[i].ready)
> +				++num_rings;
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 1;
>   		break;
>   	case AMDGPU_HW_IP_VCN_JPEG:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		ring_mask = adev->vcn.ring_jpeg.ready;
> +		if (adev->vcn.ring_jpeg.ready)
> +			++num_rings;
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
>   		break;
> @@ -357,10 +368,13 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	if (i == adev->num_ip_blocks)
>   		return 0;
>   
> +	num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
> +			num_rings);
> +
>   	result->hw_ip_version_major = adev->ip_blocks[i].version->major;
>   	result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
>   	result->capabilities_flags = 0;
> -	result->available_rings = ring_mask;
> +	result->available_rings = (1 << num_rings) - 1;
>   	result->ib_start_alignment = ib_start_alignment;
>   	result->ib_size_alignment = ib_size_alignment;
>   	return 0;