[PATCH 2/2] drm/amdgpu: allocate entities on demand

Wed Jan 22 09:33:06 UTC 2020

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entities
for a HW IP only when it is required.

v2: consolidated priority checking at amdgpu_ctx_priority_permit()

Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 +++++++++++++-----------
 1 file changed, 87 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index eecbd68db986..d704e1bebb1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -45,6 +45,9 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
+	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+		return -EINVAL;
+
 	/* NORMAL and below are accessible by everyone */
 	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
 		return 0;
@@ -58,65 +61,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 	return -EACCES;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-			   enum drm_sched_priority priority,
-			   struct drm_file *filp,
-			   struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip)
 {
-	unsigned i, j;
-	int r;
+	struct amdgpu_device *adev = ctx->adev;
+	struct drm_gpu_scheduler **scheds;
+	struct drm_gpu_scheduler *sched;
+	unsigned num_scheds = 0;
+	enum drm_sched_priority priority;
+	int j, r;
 
-	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-		return -EINVAL;
+	ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+				       sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
 
-	r = amdgpu_ctx_priority_permit(filp, priority);
-	if (r)
-		return r;
+	if (!ctx->entities[hw_ip])
+		return  -ENOMEM;
 
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->adev = adev;
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
 
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
-					   sizeof(struct amdgpu_ctx_entity),
-					   GFP_KERNEL);
+		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
 
-		if (!ctx->entities[0]) {
-			r =  -ENOMEM;
-			goto error_cleanup_entity_memory;
+		entity->sequence = 1;
+		entity->fences = kcalloc(amdgpu_sched_jobs,
+					 sizeof(struct dma_fence*), GFP_KERNEL);
+		if (!entity->fences) {
+			r = -ENOMEM;
+			goto error_cleanup_memory;
 		}
 	}
 
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
-
-			entity->sequence = 1;
-			entity->fences = kcalloc(amdgpu_sched_jobs,
-						 sizeof(struct dma_fence*), GFP_KERNEL);
-			if (!entity->fences) {
-				r = -ENOMEM;
-				goto error_cleanup_memory;
-			}
-		}
-	}
-
-	kref_init(&ctx->refcount);
-	spin_lock_init(&ctx->ring_lock);
-	mutex_init(&ctx->lock);
-
-	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-	ctx->reset_counter_query = ctx->reset_counter;
-	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-	ctx->init_priority = priority;
-	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct drm_gpu_scheduler **scheds;
-		struct drm_gpu_scheduler *sched;
-		unsigned num_scheds = 0;
-
-		switch (i) {
+	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+				ctx->init_priority : ctx->override_priority;
+	switch (hw_ip) {
 		case AMDGPU_HW_IP_GFX:
 			sched = &adev->gfx.gfx_ring[0].sched;
 			scheds = &sched;
@@ -157,12 +132,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			scheds = adev->jpeg.jpeg_sched;
 			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
-		}
+	}
 
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  priority, scheds,
-						  num_scheds, &ctx->guilty);
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+		r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
+					  priority, scheds,
+					  num_scheds, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
@@ -170,30 +145,51 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	return 0;
 
 error_cleanup_entities:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
-	}
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
+		drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
 
 error_cleanup_memory:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
 
-			kfree(entity->fences);
-			entity->fences = NULL;
-		}
+		kfree(entity->fences);
+		entity->fences = NULL;
 	}
 
-error_cleanup_entity_memory:
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		kfree(ctx->entities[i]);
-		ctx->entities[i] = NULL;
-	}
+	kfree(ctx->entities[hw_ip]);
+	ctx->entities[hw_ip] = NULL;
 
 	return r;
 }
 
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+			   enum drm_sched_priority priority,
+			   struct drm_file *filp,
+			   struct amdgpu_ctx *ctx)
+{
+	int r;
+
+	r = amdgpu_ctx_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->adev = adev;
+
+	kref_init(&ctx->refcount);
+	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
+
+	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->reset_counter_query = ctx->reset_counter;
+	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+	ctx->init_priority = priority;
+	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+	return 0;
+
+}
+
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
@@ -204,7 +200,14 @@ static void amdgpu_ctx_fini(struct kref *ref)
 		return;
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
-			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+			struct amdgpu_ctx_entity *entity;
+
+			if (!ctx->entities[i])
+				continue;
+
+			entity = &ctx->entities[i][j];
+			if (!entity->fences)
+				continue;
 
 			for (k = 0; k < amdgpu_sched_jobs; ++k)
 				dma_fence_put(entity->fences[k]);
@@ -241,6 +244,9 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
+	if (ctx->entities[hw_ip] == NULL)
+		amdgpu_ctx_init_entity(ctx, hw_ip);
+
 	*entity = &ctx->entities[hw_ip][ring].entity;
 	return 0;
 }
@@ -285,8 +291,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			if (!ctx->entities[i])
+				continue;
 			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+		}
 	}
 
 	amdgpu_ctx_fini(ref);
@@ -579,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
 
+				if (!ctx->entities[i])
+					continue;
+
 				entity = &ctx->entities[i][j].entity;
 				timeout = drm_sched_entity_flush(entity, timeout);
 			}
@@ -601,11 +613,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			DRM_ERROR("ctx %p is still alive\n", ctx);
 			continue;
 		}
-
 		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 				struct drm_sched_entity *entity;
 
+				if (!ctx->entities[i])
+					continue;
+
 				entity = &ctx->entities[i][j].entity;
 				drm_sched_entity_fini(entity);
 			}
-- 
2.24.1