[PATCH 3/4] drm/amdgpu: allocate entities on demand
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Dec 6 19:40:24 UTC 2019
Am 06.12.19 um 18:33 schrieb Nirmoy Das:
> Currently we pre-allocate entities for all the HW IPs on
> context creation and some of which are might never be used.
>
> This patch tries to resolve entity wastage by creating entities
> for a HW IP only when it is required.
Please delay that until we have fully cleaned up the scheduler
initialization.
Christian.
>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 176 +++++++++++++-----------
> 1 file changed, 97 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 1d6850af9908..c7643af8827f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -68,13 +68,99 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
> return -EACCES;
> }
>
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)
> +{
> + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> + struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
> + struct amdgpu_device *adev = ctx->adev;
> + unsigned num_rings = 0;
> + unsigned num_scheds = 0;
> + unsigned i, j;
> + int r = 0;
> +
> + switch (hw_ip) {
> + case AMDGPU_HW_IP_GFX:
> + rings[0] = &adev->gfx.gfx_ring[0];
> + num_rings = 1;
> + break;
> + case AMDGPU_HW_IP_COMPUTE:
> + for (i = 0; i < adev->gfx.num_compute_rings; ++i)
> + rings[i] = &adev->gfx.compute_ring[i];
> + num_rings = adev->gfx.num_compute_rings;
> + break;
> + case AMDGPU_HW_IP_DMA:
> + for (i = 0; i < adev->sdma.num_instances; ++i)
> + rings[i] = &adev->sdma.instance[i].ring;
> + num_rings = adev->sdma.num_instances;
> + break;
> + case AMDGPU_HW_IP_UVD:
> + rings[0] = &adev->uvd.inst[0].ring;
> + num_rings = 1;
> + break;
> + case AMDGPU_HW_IP_VCE:
> + rings[0] = &adev->vce.ring[0];
> + num_rings = 1;
> + break;
> + case AMDGPU_HW_IP_UVD_ENC:
> + rings[0] = &adev->uvd.inst[0].ring_enc[0];
> + num_rings = 1;
> + break;
> + case AMDGPU_HW_IP_VCN_DEC:
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + if (adev->vcn.harvest_config & (1 << i))
> + continue;
> + rings[num_rings++] = &adev->vcn.inst[i].ring_dec;
> + }
> + break;
> + case AMDGPU_HW_IP_VCN_ENC:
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + if (adev->vcn.harvest_config & (1 << i))
> + continue;
> + for (j = 0; j < adev->vcn.num_enc_rings; ++j)
> + rings[num_rings++] = &adev->vcn.inst[i].ring_enc[j];
> + }
> + break;
> + case AMDGPU_HW_IP_VCN_JPEG:
> + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
> + if (adev->vcn.harvest_config & (1 << i))
> + continue;
> + rings[num_rings++] = &adev->jpeg.inst[i].ring_dec;
> + }
> + break;
> + }
> +
> + for (i = 0; i < num_rings; ++i) {
> + if (!rings[i]->adev)
> + continue;
> +
> + sched_list[num_scheds++] = &rings[i]->sched;
> + }
> +
> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> + r = drm_sched_entity_init(&ctx->entities[hw_ip][i].entity,
> + ctx->init_priority, sched_list, num_scheds, &ctx->guilty);
> + if (r)
> + goto error_cleanup_entities;
> +
> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> + ctx->entities[hw_ip][i].sequence = 1;
> +
> + return 0;
> +
> +error_cleanup_entities:
> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i)
> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i].entity);
> +
> + return r;
> +}
> +
> static int amdgpu_ctx_init(struct amdgpu_device *adev,
> enum drm_sched_priority priority,
> struct drm_file *filp,
> struct amdgpu_ctx *ctx)
> {
> unsigned num_entities = amdgpu_ctx_total_num_entities();
> - unsigned i, j, k;
> + unsigned i;
> int r;
>
> if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> @@ -103,7 +189,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> for (i = 0; i < num_entities; ++i) {
> struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
>
> - entity->sequence = 1;
> + entity->sequence = -1;
> entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
> }
> for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
> @@ -120,85 +206,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> ctx->init_priority = priority;
> ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
>
> - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> - struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
> - struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
> - unsigned num_rings = 0;
> - unsigned num_rqs = 0;
> -
> - switch (i) {
> - case AMDGPU_HW_IP_GFX:
> - rings[0] = &adev->gfx.gfx_ring[0];
> - num_rings = 1;
> - break;
> - case AMDGPU_HW_IP_COMPUTE:
> - for (j = 0; j < adev->gfx.num_compute_rings; ++j)
> - rings[j] = &adev->gfx.compute_ring[j];
> - num_rings = adev->gfx.num_compute_rings;
> - break;
> - case AMDGPU_HW_IP_DMA:
> - for (j = 0; j < adev->sdma.num_instances; ++j)
> - rings[j] = &adev->sdma.instance[j].ring;
> - num_rings = adev->sdma.num_instances;
> - break;
> - case AMDGPU_HW_IP_UVD:
> - rings[0] = &adev->uvd.inst[0].ring;
> - num_rings = 1;
> - break;
> - case AMDGPU_HW_IP_VCE:
> - rings[0] = &adev->vce.ring[0];
> - num_rings = 1;
> - break;
> - case AMDGPU_HW_IP_UVD_ENC:
> - rings[0] = &adev->uvd.inst[0].ring_enc[0];
> - num_rings = 1;
> - break;
> - case AMDGPU_HW_IP_VCN_DEC:
> - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> - if (adev->vcn.harvest_config & (1 << j))
> - continue;
> - rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
> - }
> - break;
> - case AMDGPU_HW_IP_VCN_ENC:
> - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
> - if (adev->vcn.harvest_config & (1 << j))
> - continue;
> - for (k = 0; k < adev->vcn.num_enc_rings; ++k)
> - rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
> - }
> - break;
> - case AMDGPU_HW_IP_VCN_JPEG:
> - for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
> - if (adev->vcn.harvest_config & (1 << j))
> - continue;
> - rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
> - }
> - break;
> - }
> -
> - for (j = 0; j < num_rings; ++j) {
> - if (!rings[j]->adev)
> - continue;
> -
> - sched_list[num_rqs++] = &rings[j]->sched;
> - }
> -
> - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> - r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> - priority, sched_list,
> - num_rqs, &ctx->guilty);
> - if (r)
> - goto error_cleanup_entities;
> - }
> -
> return 0;
>
> -error_cleanup_entities:
> - for (i = 0; i < num_entities; ++i)
> - drm_sched_entity_destroy(&ctx->entities[0][i].entity);
> - kfree(ctx->entities[0]);
> -
> error_free_fences:
> kfree(ctx->fences);
> ctx->fences = NULL;
> @@ -229,6 +238,8 @@ static void amdgpu_ctx_fini(struct kref *ref)
> int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> u32 ring, struct drm_sched_entity **entity)
> {
> + int r;
> +
> if (hw_ip >= AMDGPU_HW_IP_NUM) {
> DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
> return -EINVAL;
> @@ -245,6 +256,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
> return -EINVAL;
> }
>
> + if (ctx->entities[hw_ip][ring].sequence == -1) {
> + r = amdgpu_ctx_init_entity(ctx, hw_ip);
> +
> + if (r)
> + return r;
> + }
> +
> *entity = &ctx->entities[hw_ip][ring].entity;
> return 0;
> }
More information about the amd-gfx
mailing list