[RFC PATCH 2/3] drm/amdgpu: change hw sched list on ctx priority override
Luben Tuikov
luben.tuikov at amd.com
Fri Feb 28 04:00:05 UTC 2020
On 2020-02-26 3:37 p.m., Nirmoy Das wrote:
> We were changing compute ring priority while rings were being used
> before every job submission which is not recommended. This patch
> recreates entity with higher/normal priority sched list when user
> changes ctx's priority.
>
> high/normal priority sched list are generated from set of high/normal
> priority compute queues. When there are no high priority hw queues then
> it fall backs to software priority.
>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 58 ++++++++++++++++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ++
> 5 files changed, 59 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f397ff97b4e4..8304d0c87899 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> struct drm_sched_entity *entity = p->entity;
> enum drm_sched_priority priority;
> - struct amdgpu_ring *ring;
> struct amdgpu_bo_list_entry *e;
> struct amdgpu_job *job;
> uint64_t seq;
> @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> priority = job->base.s_priority;
> drm_sched_entity_push_job(&job->base, entity);
>
> - ring = to_amdgpu_ring(entity->rq->sched);
> - amdgpu_ring_priority_get(ring, priority);
> -
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 94a6c42f29ea..ea4dc57d2237 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -85,8 +85,13 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
> num_scheds = 1;
> break;
> case AMDGPU_HW_IP_COMPUTE:
> - scheds = adev->gfx.compute_sched;
> - num_scheds = adev->gfx.num_compute_sched;
> + if (priority <= DRM_SCHED_PRIORITY_NORMAL) {
> + scheds = adev->gfx.compute_sched;
> + num_scheds = adev->gfx.num_compute_sched;
> + } else {
> + scheds = adev->gfx.compute_sched_high;
> + num_scheds = adev->gfx.num_compute_sched_high;
> + }
Why more if-conditionals?
If you initialize a map of DRM_SCHED_PRIORITY_MAX of type then you can simply do:
scheds = adev->gfx.map[priority];
So, part of your array would contain normal and the rest high.
Also, I don't think you should introduce yet another
compute_sched[] array. All this should be folded into
a single array containing both normal and high.
Also consider changing to this:
enum drm_sched_priority {
DRM_SCHED_PRIORITY_UNSET,
--------DRM_SCHED_PRIORITY_INVALID,--------<--- remove
DRM_SCHED_PRIORITY_MIN,
DRM_SCHED_PRIORITY_LOW = DRM_SCHED_PRIORITY_MIN,
DRM_SCHED_PRIORITY_NORMAL,
DRM_SCHED_PRIORITY_HIGH_SW,
DRM_SCHED_PRIORITY_HIGH_HW,
DRM_SCHED_PRIORITY_KERNEL,
DRM_SCHED_PRIORITY_MAX,
};
We should never have an "invalid priority", just ignored priority. :-)
Second, having 0 as UNSET gives you easy priority when you set
map[0] = normal_priority, as memory usually comes memset to 0.
IOW, you'd not need to check against UNSET, and simply use
the default [0] which you'd set to normal priority.
> break;
> case AMDGPU_HW_IP_DMA:
> scheds = adev->sdma.sdma_sched;
> @@ -502,6 +507,24 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
> return fence;
> }
>
> +static void amdgpu_ctx_hw_priority_override(struct amdgpu_ctx *ctx,
> + const u32 hw_ip,
> + enum drm_sched_priority priority)
> +{
> + int i;
> +
> + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
> + if (!ctx->entities[hw_ip][i])
> + continue;
> +
> + /* TODO what happens with prev scheduled jobs */
> + drm_sched_entity_destroy(&ctx->entities[hw_ip][i]->entity);
> + amdgpu_ctx_fini_entity(ctx->entities[hw_ip][i]);
> +
> + amdgpu_ctx_init_entity(ctx, AMDGPU_HW_IP_COMPUTE, i);
> +
> + }
> +}
> void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> enum drm_sched_priority priority)
> {
> @@ -515,12 +538,18 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
> for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> struct drm_sched_entity *entity;
> + struct amdgpu_ring *ring;
>
> if (!ctx->entities[i][j])
> continue;
>
> entity = &ctx->entities[i][j]->entity;
> - drm_sched_entity_set_priority(entity, ctx_prio);
> + ring = to_amdgpu_ring(entity->rq->sched);
> +
> + if (ring->funcs->init_priority)
> + amdgpu_ctx_hw_priority_override(ctx, i, priority);
> + else
> + drm_sched_entity_set_priority(entity, ctx_prio);
Why more if-conditionals?
Could we not have a map?
> }
> }
> }
> @@ -630,6 +659,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
>
> void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
> {
> + enum drm_sched_priority priority;
> int i, j;
>
> for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
> @@ -638,8 +668,26 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
> }
>
> for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
> - adev->gfx.num_compute_sched++;
> + priority = adev->gfx.compute_ring[i].priority;
> + if (priority <= DRM_SCHED_PRIORITY_NORMAL) {
> + adev->gfx.compute_sched[i] =
> + &adev->gfx.compute_ring[i].sched;
> + adev->gfx.num_compute_sched++;
> + } else {
> + adev->gfx.compute_sched_high[i] =
> + &adev->gfx.compute_ring[i].sched;
> + adev->gfx.num_compute_sched_high++;
> + }
> + }
I think it would be better to use a map for this
as opposed to if-conditional.
> +
> + /* if there are no high prio compute queue then mirror with normal
> + * priority so amdgpu_ctx_init_entity() works as expected */
> + if (!adev->gfx.num_compute_sched_high) {
> + for (i = 0; i < adev->gfx.num_compute_sched; i++) {
> + adev->gfx.compute_sched_high[i] =
> + adev->gfx.compute_sched[i];
> + }
> + adev->gfx.num_compute_sched_high = adev->gfx.num_compute_sched;
> }
>
> for (i = 0; i < adev->sdma.num_instances; i++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index ca17ffb01301..d58d748e3a56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -279,7 +279,9 @@ struct amdgpu_gfx {
> unsigned num_gfx_rings;
> struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
> + struct drm_gpu_scheduler *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
> uint32_t num_compute_sched;
> + uint32_t num_compute_sched_high;
> unsigned num_compute_rings;
> struct amdgpu_irq_src eop_irq;
> struct amdgpu_irq_src priv_reg_irq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index d42be880a236..4981e443a884 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
>
> static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> {
> - struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
> struct amdgpu_job *job = to_amdgpu_job(s_job);
>
> drm_sched_job_cleanup(s_job);
>
> - amdgpu_ring_priority_put(ring, s_job->s_priority);
> dma_fence_put(job->fence);
> amdgpu_sync_free(&job->sync);
> amdgpu_sync_free(&job->sched_sync);
> @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> void *owner, struct dma_fence **f)
> {
> enum drm_sched_priority priority;
> - struct amdgpu_ring *ring;
> int r;
>
> if (!f)
> @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> priority = job->base.s_priority;
> drm_sched_entity_push_job(&job->base, entity);
>
> - ring = to_amdgpu_ring(entity->rq->sched);
> - amdgpu_ring_priority_get(ring, priority);
> -
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 18e11b0fdc3e..4501ae7afb2e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -326,6 +326,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>
> ring->max_dw = max_dw;
> ring->priority = DRM_SCHED_PRIORITY_NORMAL;
> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
> + ring->funcs->init_priority)
> + ring->funcs->init_priority(ring);
> +
Why not add "init_priority" to all and just call it here unconditionally?
Regards,
Luben
> mutex_init(&ring->priority_mutex);
>
> for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
>
More information about the amd-gfx
mailing list