[RFC PATCH 2/2] drm/amdgpu: disable gpu load balancer for vcn jobs
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Mar 11 18:03:00 UTC 2020
Am 11.03.20 um 18:18 schrieb Nirmoy Das:
> VCN HW doesn't support dynamic load balance on multiple
> instances for a context. This modifies the entity's sched_list
> to a sched_list consist of only one drm scheduler.
>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 +
> 4 files changed, 29 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 8304d0c87899..00032093d8a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1257,6 +1257,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> priority = job->base.s_priority;
> drm_sched_entity_push_job(&job->base, entity);
>
> + amdgpu_ctx_limit_load_balance(entity);
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index fa575bdc03c8..57b49188306d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -139,6 +139,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
> if (r)
> goto error_free_entity;
>
> + entity->hw_ip = hw_ip;
> ctx->entities[hw_ip][ring] = entity;
> return 0;
>
> @@ -559,6 +560,30 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> }
> }
>
> +static void limit_vcn_load_balance(struct amdgpu_ctx_entity *centity)
> +{
> + struct drm_gpu_scheduler **scheds = ¢ity->entity.rq->sched;
> +
> + if (drm_sched_entity_num_jobs(¢ity->entity) == 1)
That check doesn't work correctly, the job might actually already be
processed when we hit here.
> + drm_sched_entity_modify_sched(¢ity->entity, scheds, 1);
Just always update the scheduler here.
> +
> +}
> +
> +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity)
> +{
> + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
> +
> + if (!centity)
> + return;
That check looks superfluous to me.
> +
> + switch (centity->hw_ip) {
Better get the ring from entity->rq->sched instead.
> + case AMDGPU_HW_IP_VCN_DEC:
> + case AMDGPU_HW_IP_VCN_ENC:
Maybe better to make that a flag in the ring functions, but this way
works as well.
Regards,
Christian.
> + limit_vcn_load_balance(centity);
> + }
> +
> +}
> +
> int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
> struct drm_sched_entity *entity)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index de490f183af2..d52d8d562d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -33,6 +33,7 @@ struct amdgpu_fpriv;
>
> struct amdgpu_ctx_entity {
> uint64_t sequence;
> + uint32_t hw_ip;
> struct drm_sched_entity entity;
> struct dma_fence *fences[];
> };
> @@ -90,5 +91,6 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>
> void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
>
> +void amdgpu_ctx_limit_load_balance(struct drm_sched_entity *entity);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 4981e443a884..955d12bc89ae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -154,6 +154,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> amdgpu_job_free_resources(job);
> priority = job->base.s_priority;
> drm_sched_entity_push_job(&job->base, entity);
> + amdgpu_ctx_limit_load_balance(entity);
>
> return 0;
> }
More information about the amd-gfx
mailing list