[PATCH 1/2] drm/amdgpu: individualize amdgpu entity allocation per HW IP

Tue Jan 21 23:34:49 UTC 2020

On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
> Do not allocate all the entity at once. This is required for
> dynamic amdgpu_ctx_entity creation.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 130 ++++++++++++------------
>  1 file changed, 65 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 05c2af61e7de..91638a2a5163 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -42,16 +42,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>  	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
>  };
>  
> -static int amdgpu_ctx_total_num_entities(void)
> -{
> -	unsigned i, num_entities = 0;
> -
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		num_entities += amdgpu_ctx_num_entities[i];
> -
> -	return num_entities;
> -}
> -
>  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>  				      enum drm_sched_priority priority)
>  {
> @@ -73,7 +63,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  			   struct drm_file *filp,
>  			   struct amdgpu_ctx *ctx)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	unsigned i, j;
>  	int r;
>  
> @@ -87,28 +76,29 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	memset(ctx, 0, sizeof(*ctx));
>  	ctx->adev = adev;
>  
> -
> -	ctx->entities[0] = kcalloc(num_entities,
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
>  				   sizeof(struct amdgpu_ctx_entity),
>  				   GFP_KERNEL);

Are these lines indented to the agument list column? They seem
that they are not...

> -	if (!ctx->entities[0])
> -		return -ENOMEM;
>  
> +		if (!ctx->entities[0]) {
> +			r =  -ENOMEM;
> +			goto error_cleanup_entity_memory;
> +		}
> +	}

Brake the paragraphs with an empty line, here.

> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {

Please use a brace for the outer for-loop, the i-counter.
This style leaves the ending row/column
empty for two levels of indentation.

>  
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -		entity->sequence = 1;
> -		entity->fences = kcalloc(amdgpu_sched_jobs,
> +			entity->sequence = 1;
> +			entity->fences = kcalloc(amdgpu_sched_jobs,
>  					 sizeof(struct dma_fence*), GFP_KERNEL);

The indentation of sizeof(... seems incorrect.

> -		if (!entity->fences) {
> -			r = -ENOMEM;
> -			goto error_cleanup_memory;
> +			if (!entity->fences) {
> +				r = -ENOMEM;
> +				goto error_cleanup_memory;
> +			}
>  		}
> -	}

This brace would stay...

> -	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> -		ctx->entities[i] = ctx->entities[i - 1] +
> -			amdgpu_ctx_num_entities[i - 1];
>  
>  	kref_init(&ctx->refcount);
>  	spin_lock_init(&ctx->ring_lock);
> @@ -179,44 +169,52 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	return 0;
>  
>  error_cleanup_entities:

Notice this label sits after the "return 0;", so it really
is an "unroll" and "free" operation.

> -	for (i = 0; i < num_entities; ++i)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>  
>  error_cleanup_memory:
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Add the brace back in for completeness and style.

> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -		kfree(entity->fences);
> -		entity->fences = NULL;
> +			kfree(entity->fences);
> +			entity->fences = NULL;
> +		}
> +
> +error_cleanup_entity_memory:

"cleanup" refers to something spilled, or something to be collected.
(Or winning in a wagered game of chance.) Also at "module_exit", maybe.

The kernel has traditionally used "unroll" and "free" for this. Now, since
you're unrolling the loop (notice that it sits after the "return 0;"), then
you can backtrack and name it like this:

Error_unroll_free1:
	for ( ; i >= 0; i--)
		free(my_array[i]);

> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		kfree(ctx->entities[i]);
> +		ctx->entities[i] = NULL;
>  	}
>  
> -	kfree(ctx->entities[0]);
> -	ctx->entities[0] = NULL;
>  	return r;
>  }
>  
>  static void amdgpu_ctx_fini(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_device *adev = ctx->adev;
> -	unsigned i, j;
> +	unsigned i, j, k;
>  
>  	if (!adev)
>  		return;
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
>  
> -	for (i = 0; i < num_entities; ++i) {
> -		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
> +			for (k = 0; k < amdgpu_sched_jobs; ++k)
> +				dma_fence_put(entity->fences[k]);
>  
> -		for (j = 0; j < amdgpu_sched_jobs; ++j)
> -			dma_fence_put(entity->fences[j]);
> +			kfree(entity->fences);
> +		}
>  
> -		kfree(entity->fences);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> +		kfree(ctx->entities[i]);
> +		ctx->entities[i] = NULL;
>  	}
>  
> -	kfree(ctx->entities[0]);
>  	mutex_destroy(&ctx->lock);
> -
>  	kfree(ctx);
>  }
>  
> @@ -279,14 +277,12 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>  static void amdgpu_ctx_do_release(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx;
> -	unsigned num_entities;
> -	u32 i;
> +	u32 i, j;
>  
>  	ctx = container_of(ref, struct amdgpu_ctx, refcount);
> -
> -	num_entities = amdgpu_ctx_total_num_entities();
> -	for (i = 0; i < num_entities; i++)
> -		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
>  
>  	amdgpu_ctx_fini(ref);
>  }
> @@ -516,20 +512,21 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>  				  enum drm_sched_priority priority)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	enum drm_sched_priority ctx_prio;
> -	unsigned i;
> +	unsigned i, j;
>  
>  	ctx->override_priority = priority;
>  
>  	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
>  			ctx->init_priority : ctx->override_priority;
> +	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  
> -	for (i = 0; i < num_entities; i++) {
> -		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
> +			struct drm_sched_entity *entity =
> +				&ctx->entities[i][j].entity;
>  
> -		drm_sched_entity_set_priority(entity, ctx_prio);
> -	}
> +			drm_sched_entity_set_priority(entity, ctx_prio);
> +		}
	} ;-)
>  }
>  
>  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
> @@ -564,20 +561,20 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>  
>  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_ctx *ctx;
>  	struct idr *idp;
> -	uint32_t id, i;
> +	uint32_t id, i, j;
>  
>  	idp = &mgr->ctx_handles;
>  
>  	mutex_lock(&mgr->lock);
>  	idr_for_each_entry(idp, ctx, id) {
> -		for (i = 0; i < num_entities; i++) {
> -			struct drm_sched_entity *entity;
> +		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +				struct drm_sched_entity *entity;
>  
> -			entity = &ctx->entities[0][i].entity;
> -			timeout = drm_sched_entity_flush(entity, timeout);
> +				entity = &ctx->entities[i][j].entity;
> +				timeout = drm_sched_entity_flush(entity, timeout);
>  		}
>  	}
>  	mutex_unlock(&mgr->lock);
> @@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  
>  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  {
> -	unsigned num_entities = amdgpu_ctx_total_num_entities();
>  	struct amdgpu_ctx *ctx;
>  	struct idr *idp;
> -	uint32_t id, i;
> +	uint32_t id, i, j;
>  
>  	idp = &mgr->ctx_handles;
>  
> @@ -598,9 +594,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  			DRM_ERROR("ctx %p is still alive\n", ctx);
>  			continue;
>  		}
> +		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)

Brace.

> +			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +				struct drm_sched_entity *entity;
>  
> -		for (i = 0; i < num_entities; i++)
> -			drm_sched_entity_fini(&ctx->entities[0][i].entity);
> +				entity = &ctx->entities[i][j].entity;
> +				drm_sched_entity_fini(entity);
> +			}
>  	}
>  }
>  
> 

Regards,
Luben