[PATCH 2/2] drm/amdgpu: allocate entities on demand

Wed Jan 22 11:39:55 UTC 2020

Am 22.01.20 um 10:33 schrieb Nirmoy Das:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity/fences wastage by creating entities
> for a HW IP only when it is required.
>
> v2: consolidated priority checking at amdgpu_ctx_priority_permit()

Well that is not really what I had in mind. See almost all applications 
use only the first instance of a hw_ip.

So what we should probably do allocate entities on demand instead of 
whole hw_ips.

Do you know what I mean?

Regards,
Christian.

>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 +++++++++++++-----------
>   1 file changed, 87 insertions(+), 73 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index eecbd68db986..d704e1bebb1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -45,6 +45,9 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
>   static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>   				      enum drm_sched_priority priority)
>   {
> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> +		return -EINVAL;
> +
>   	/* NORMAL and below are accessible by everyone */
>   	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
>   		return 0;
> @@ -58,65 +61,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>   	return -EACCES;
>   }
>   
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> -			   enum drm_sched_priority priority,
> -			   struct drm_file *filp,
> -			   struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip)
>   {
> -	unsigned i, j;
> -	int r;
> +	struct amdgpu_device *adev = ctx->adev;
> +	struct drm_gpu_scheduler **scheds;
> +	struct drm_gpu_scheduler *sched;
> +	unsigned num_scheds = 0;
> +	enum drm_sched_priority priority;
> +	int j, r;
>   
> -	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> -		return -EINVAL;
> +	ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
> +				       sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
>   
> -	r = amdgpu_ctx_priority_permit(filp, priority);
> -	if (r)
> -		return r;
> +	if (!ctx->entities[hw_ip])
> +		return  -ENOMEM;
>   
> -	memset(ctx, 0, sizeof(*ctx));
> -	ctx->adev = adev;
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>   
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
> -					   sizeof(struct amdgpu_ctx_entity),
> -					   GFP_KERNEL);
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>   
> -		if (!ctx->entities[0]) {
> -			r =  -ENOMEM;
> -			goto error_cleanup_entity_memory;
> +		entity->sequence = 1;
> +		entity->fences = kcalloc(amdgpu_sched_jobs,
> +					 sizeof(struct dma_fence*), GFP_KERNEL);
> +		if (!entity->fences) {
> +			r = -ENOMEM;
> +			goto error_cleanup_memory;
>   		}
>   	}
>   
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> -
> -			entity->sequence = 1;
> -			entity->fences = kcalloc(amdgpu_sched_jobs,
> -						 sizeof(struct dma_fence*), GFP_KERNEL);
> -			if (!entity->fences) {
> -				r = -ENOMEM;
> -				goto error_cleanup_memory;
> -			}
> -		}
> -	}
> -
> -	kref_init(&ctx->refcount);
> -	spin_lock_init(&ctx->ring_lock);
> -	mutex_init(&ctx->lock);
> -
> -	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> -	ctx->reset_counter_query = ctx->reset_counter;
> -	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> -	ctx->init_priority = priority;
> -	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		struct drm_gpu_scheduler **scheds;
> -		struct drm_gpu_scheduler *sched;
> -		unsigned num_scheds = 0;
> -
> -		switch (i) {
> +	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> +				ctx->init_priority : ctx->override_priority;
> +	switch (hw_ip) {
>   		case AMDGPU_HW_IP_GFX:
>   			sched = &adev->gfx.gfx_ring[0].sched;
>   			scheds = &sched;
> @@ -157,12 +132,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   			scheds = adev->jpeg.jpeg_sched;
>   			num_scheds =  adev->jpeg.num_jpeg_sched;
>   			break;
> -		}
> +	}
>   
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  priority, scheds,
> -						  num_scheds, &ctx->guilty);
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
> +					  priority, scheds,
> +					  num_scheds, &ctx->guilty);
>   		if (r)
>   			goto error_cleanup_entities;
>   	}
> @@ -170,30 +145,51 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	return 0;
>   
>   error_cleanup_entities:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> -	}
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
> +		drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
>   
>   error_cleanup_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>   
> -			kfree(entity->fences);
> -			entity->fences = NULL;
> -		}
> +		kfree(entity->fences);
> +		entity->fences = NULL;
>   	}
>   
> -error_cleanup_entity_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		kfree(ctx->entities[i]);
> -		ctx->entities[i] = NULL;
> -	}
> +	kfree(ctx->entities[hw_ip]);
> +	ctx->entities[hw_ip] = NULL;
>   
>   	return r;
>   }
>   
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> +			   enum drm_sched_priority priority,
> +			   struct drm_file *filp,
> +			   struct amdgpu_ctx *ctx)
> +{
> +	int r;
> +
> +	r = amdgpu_ctx_priority_permit(filp, priority);
> +	if (r)
> +		return r;
> +
> +	memset(ctx, 0, sizeof(*ctx));
> +	ctx->adev = adev;
> +
> +	kref_init(&ctx->refcount);
> +	spin_lock_init(&ctx->ring_lock);
> +	mutex_init(&ctx->lock);
> +
> +	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> +	ctx->reset_counter_query = ctx->reset_counter;
> +	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> +	ctx->init_priority = priority;
> +	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> +	return 0;
> +
> +}
> +
>   static void amdgpu_ctx_fini(struct kref *ref)
>   {
>   	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> @@ -204,7 +200,14 @@ static void amdgpu_ctx_fini(struct kref *ref)
>   		return;
>   	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>   		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +			struct amdgpu_ctx_entity *entity;
> +
> +			if (!ctx->entities[i])
> +				continue;
> +
> +			entity = &ctx->entities[i][j];
> +			if (!entity->fences)
> +				continue;
>   
>   			for (k = 0; k < amdgpu_sched_jobs; ++k)
>   				dma_fence_put(entity->fences[k]);
> @@ -241,6 +244,9 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>   		return -EINVAL;
>   	}
>   
> +	if (ctx->entities[hw_ip] == NULL)
> +		amdgpu_ctx_init_entity(ctx, hw_ip);
> +
>   	*entity = &ctx->entities[hw_ip][ring].entity;
>   	return 0;
>   }
> @@ -285,8 +291,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>   
>   	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>   	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			if (!ctx->entities[i])
> +				continue;
>   			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> +		}
>   	}
>   
>   	amdgpu_ctx_fini(ref);
> @@ -579,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>   			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>   				struct drm_sched_entity *entity;
>   
> +				if (!ctx->entities[i])
> +					continue;
> +
>   				entity = &ctx->entities[i][j].entity;
>   				timeout = drm_sched_entity_flush(entity, timeout);
>   			}
> @@ -601,11 +613,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>   			DRM_ERROR("ctx %p is still alive\n", ctx);
>   			continue;
>   		}
> -
>   		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>   			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>   				struct drm_sched_entity *entity;
>   
> +				if (!ctx->entities[i])
> +					continue;
> +
>   				entity = &ctx->entities[i][j].entity;
>   				drm_sched_entity_fini(entity);
>   			}