[RFC PATCH] drm/amdgpu: allocate entities on demand
Nirmoy Das
nirmoy.aiemd at gmail.com
Mon Nov 25 23:17:19 UTC 2019
Currently we pre-allocate entities for all the HW IPs on
context creation and some of which are might never be used.
This patch tries to resolve entity wastage by creating entities
for a HW IP only when it is required.
Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 142 ++++++++++++++----------
1 file changed, 81 insertions(+), 61 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a0d3d7b756eb..0a390ebe873f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct amdgpu_ctx *ctx)
{
unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j, k;
+ unsigned i;
int r;
if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -103,7 +103,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
for (i = 0; i < num_entities; ++i) {
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
- entity->sequence = 1;
+ entity->sequence = -1;
entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
@@ -120,25 +120,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
- struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
- unsigned num_rings = 0;
- unsigned num_rqs = 0;
+ return 0;
+
+error_free_fences:
+ kfree(ctx->fences);
+ ctx->fences = NULL;
+ return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+ struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ unsigned num_entities = amdgpu_ctx_total_num_entities();
+ struct amdgpu_device *adev = ctx->adev;
+ unsigned i, j;
+
+ if (!adev)
+ return;
+
+ for (i = 0; i < num_entities; ++i)
+ for (j = 0; j < amdgpu_sched_jobs; ++j)
+ dma_fence_put(ctx->entities[0][i].fences[j]);
+ kfree(ctx->fences);
+ kfree(ctx->entities[0]);
+
+ mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
+}
- switch (i) {
+
+int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+ struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+ struct amdgpu_device *adev = ctx->adev;
+ unsigned num_rings = 0;
+ unsigned num_rqs = 0;
+ unsigned i, j;
+ int r = 0;
+
+ switch (hw_ip) {
case AMDGPU_HW_IP_GFX:
rings[0] = &adev->gfx.gfx_ring[0];
num_rings = 1;
break;
case AMDGPU_HW_IP_COMPUTE:
- for (j = 0; j < adev->gfx.num_compute_rings; ++j)
- rings[j] = &adev->gfx.compute_ring[j];
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+ rings[i] = &adev->gfx.compute_ring[i];
num_rings = adev->gfx.num_compute_rings;
break;
case AMDGPU_HW_IP_DMA:
- for (j = 0; j < adev->sdma.num_instances; ++j)
- rings[j] = &adev->sdma.instance[j].ring;
+ for (i = 0; i < adev->sdma.num_instances; ++i)
+ rings[i] = &adev->sdma.instance[i].ring;
num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
@@ -154,80 +188,59 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
num_rings = 1;
break;
case AMDGPU_HW_IP_VCN_DEC:
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
+ rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
}
break;
case AMDGPU_HW_IP_VCN_ENC:
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- for (k = 0; k < adev->vcn.num_enc_rings; ++k)
- rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+ rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
}
break;
case AMDGPU_HW_IP_VCN_JPEG:
- for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
continue;
- rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
+ rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
}
break;
- }
-
- for (j = 0; j < num_rings; ++j) {
- if (!rings[j]->adev)
- continue;
+ }
- rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
- }
+ for (i = 0; i < num_rings; ++i) {
+ if (!rings[i]->adev)
+ continue;
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rqs, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
+ rqs[num_rqs++] = &rings[i]->sched.sched_rq[ctx->init_priority];
}
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+ r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
+ rqs, num_rqs, &ctx->guilty);
+ if (r)
+ goto error_cleanup_entities;
+
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+ ctx->entities[hw_ip][i].sequence = 1;
+
return 0;
error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
- kfree(ctx->entities[0]);
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
+ drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
-error_free_fences:
- kfree(ctx->fences);
- ctx->fences = NULL;
return r;
}
-static void amdgpu_ctx_fini(struct kref *ref)
-{
- struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- struct amdgpu_device *adev = ctx->adev;
- unsigned i, j;
-
- if (!adev)
- return;
-
- for (i = 0; i < num_entities; ++i)
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->entities[0][i].fences[j]);
- kfree(ctx->fences);
- kfree(ctx->entities[0]);
-
- mutex_destroy(&ctx->lock);
-
- kfree(ctx);
-}
-
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
+ int r;
+
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
@@ -244,6 +257,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
+ if (ctx->entities[hw_ip][ring].sequence == -1) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip);
+
+ if (r)
+ return r;
+ }
+
*entity = &ctx->entities[hw_ip][ring].entity;
return 0;
}
--
2.23.0
More information about the amd-gfx
mailing list