[PATCH 2/2] drm/amdgpu: allocate entities on demand
Nirmoy Das
nirmoy.aiemd at gmail.com
Wed Jan 22 09:33:06 UTC 2020
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity/fences wastage by creating entities
for a HW IP only when it is required.
v2: consolidated priority checking at amdgpu_ctx_priority_permit()
Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 160 +++++++++++++-----------
1 file changed, 87 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index eecbd68db986..d704e1bebb1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -45,6 +45,9 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -58,65 +61,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
- enum drm_sched_priority priority,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip)
{
- unsigned i, j;
- int r;
+ struct amdgpu_device *adev = ctx->adev;
+ struct drm_gpu_scheduler **scheds;
+ struct drm_gpu_scheduler *sched;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
+ int j, r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
+ ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
+ sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
+ if (!ctx->entities[hw_ip])
+ return -ENOMEM;
- memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
+ for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
+ struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
- if (!ctx->entities[0]) {
- r = -ENOMEM;
- goto error_cleanup_entity_memory;
+ entity->sequence = 1;
+ entity->fences = kcalloc(amdgpu_sched_jobs,
+ sizeof(struct dma_fence*), GFP_KERNEL);
+ if (!entity->fences) {
+ r = -ENOMEM;
+ goto error_cleanup_memory;
}
}
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
-
- entity->sequence = 1;
- entity->fences = kcalloc(amdgpu_sched_jobs,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!entity->fences) {
- r = -ENOMEM;
- goto error_cleanup_memory;
- }
- }
- }
-
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
-
- ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct drm_gpu_scheduler **scheds;
- struct drm_gpu_scheduler *sched;
- unsigned num_scheds = 0;
-
- switch (i) {
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
sched = &adev->gfx.gfx_ring[0].sched;
scheds = &sched;
@@ -157,12 +132,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
scheds = adev->jpeg.jpeg_sched;
num_scheds = adev->jpeg.num_jpeg_sched;
break;
- }
+ }
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- priority, scheds,
- num_scheds, &ctx->guilty);
+ for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
+ priority, scheds,
+ num_scheds, &ctx->guilty);
if (r)
goto error_cleanup_entities;
}
@@ -170,30 +145,51 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
return 0;
error_cleanup_entities:
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- drm_sched_entity_destroy(&ctx->entities[i][j].entity);
- }
+ for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
+ drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
error_cleanup_memory:
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+ for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
+ struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
- kfree(entity->fences);
- entity->fences = NULL;
- }
+ kfree(entity->fences);
+ entity->fences = NULL;
}
-error_cleanup_entity_memory:
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- kfree(ctx->entities[i]);
- ctx->entities[i] = NULL;
- }
+ kfree(ctx->entities[hw_ip]);
+ ctx->entities[hw_ip] = NULL;
return r;
}
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+ enum drm_sched_priority priority,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx)
+{
+ int r;
+
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->adev = adev;
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+ return 0;
+
+}
+
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
@@ -204,7 +200,14 @@ static void amdgpu_ctx_fini(struct kref *ref)
return;
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
+ struct amdgpu_ctx_entity *entity;
+
+ if (!ctx->entities[i])
+ continue;
+
+ entity = &ctx->entities[i][j];
+ if (!entity->fences)
+ continue;
for (k = 0; k < amdgpu_sched_jobs; ++k)
dma_fence_put(entity->fences[k]);
@@ -241,6 +244,9 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
+ if (ctx->entities[hw_ip] == NULL)
+ amdgpu_ctx_init_entity(ctx, hw_ip);
+
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
}
@@ -285,8 +291,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
ctx = container_of(ref, struct amdgpu_ctx, refcount);
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i])
+ continue;
drm_sched_entity_destroy(&ctx->entities[i][j].entity);
+ }
}
amdgpu_ctx_fini(ref);
@@ -579,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
struct drm_sched_entity *entity;
+ if (!ctx->entities[i])
+ continue;
+
entity = &ctx->entities[i][j].entity;
timeout = drm_sched_entity_flush(entity, timeout);
}
@@ -601,11 +613,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
DRM_ERROR("ctx %p is still alive\n", ctx);
continue;
}
-
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
struct drm_sched_entity *entity;
+ if (!ctx->entities[i])
+ continue;
+
entity = &ctx->entities[i][j].entity;
drm_sched_entity_fini(entity);
}
--
2.24.1
More information about the amd-gfx
mailing list