[PATCH] drm/amdgpu: use the last IB as gang leader v2

Christian König ckoenig.leichtzumerken at gmail.com
Tue Nov 15 14:11:51 UTC 2022


I wanted to push this directly to drm-misc-fixes.

But going to add one in case someone tries to backport the gang submit 
changes.

Regards,
Christian.

Am 15.11.22 um 15:10 schrieb Alex Deucher:
> Would be good to add a Fixes tag.
>
> Alex
>
> On Tue, Nov 15, 2022 at 9:09 AM Alex Deucher <alexdeucher at gmail.com> wrote:
>> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>>
>> On Tue, Nov 15, 2022 at 4:44 AM Christian König
>> <christian.koenig at amd.com> wrote:
>>> Am 15.11.22 um 10:42 schrieb Christian König:
>>>> It turned out that not the last IB specified is the gang leader,
>>>> but instead the last job allocated.
>>>>
>>>> This is a bit unfortunate and not very intuitive for the CS
>>>> interface, so try to fix this.
>>> Alex could you take a look at this? I would really like to get this into
>>> the next -rc.
>>>
>>> Thanks,
>>> Christian.
>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ++++++++++++++++-------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |  1 +
>>>>    2 files changed, 17 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 1bbd39b3b0fc..fbdf139cf497 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
>>>>                return r;
>>>>
>>>>        ++(num_ibs[r]);
>>>> +     p->gang_leader_idx = r;
>>>>        return 0;
>>>>    }
>>>>
>>>> @@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
>>>>                if (ret)
>>>>                        goto free_all_kdata;
>>>>        }
>>>> -     p->gang_leader = p->jobs[p->gang_size - 1];
>>>> +     p->gang_leader = p->jobs[p->gang_leader_idx];
>>>>
>>>>        if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
>>>>                ret = -ECANCELED;
>>>> @@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>>>>                        return r;
>>>>        }
>>>>
>>>> -     for (i = 0; i < p->gang_size - 1; ++i) {
>>>> +     for (i = 0; i < p->gang_size; ++i) {
>>>> +             if (p->jobs[i] == leader)
>>>> +                     continue;
>>>> +
>>>>                r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
>>>>                if (r)
>>>>                        return r;
>>>>        }
>>>>
>>>> -     r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
>>>> +     r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
>>>>        if (r && r != -ERESTARTSYS)
>>>>                DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
>>>> -
>>>>        return r;
>>>>    }
>>>>
>>>> @@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>>        for (i = 0; i < p->gang_size; ++i)
>>>>                drm_sched_job_arm(&p->jobs[i]->base);
>>>>
>>>> -     for (i = 0; i < (p->gang_size - 1); ++i) {
>>>> +     for (i = 0; i < p->gang_size; ++i) {
>>>>                struct dma_fence *fence;
>>>>
>>>> +             if (p->jobs[i] == leader)
>>>> +                     continue;
>>>> +
>>>>                fence = &p->jobs[i]->base.s_fence->scheduled;
>>>>                r = amdgpu_sync_fence(&leader->sync, fence);
>>>>                if (r)
>>>> @@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>>        list_for_each_entry(e, &p->validated, tv.head) {
>>>>
>>>>                /* Everybody except for the gang leader uses READ */
>>>> -             for (i = 0; i < (p->gang_size - 1); ++i) {
>>>> +             for (i = 0; i < p->gang_size; ++i) {
>>>> +                     if (p->jobs[i] == leader)
>>>> +                             continue;
>>>> +
>>>>                        dma_resv_add_fence(e->tv.bo->base.resv,
>>>>                                           &p->jobs[i]->base.s_fence->finished,
>>>>                                           DMA_RESV_USAGE_READ);
>>>> @@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>>                e->tv.num_shared = 0;
>>>>        }
>>>>
>>>> -     seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
>>>> +     seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
>>>>                                   p->fence);
>>>>        amdgpu_cs_post_dependencies(p);
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>>>> index cbaa19b2b8a3..f80adf9069ec 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>>>> @@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
>>>>
>>>>        /* scheduler job objects */
>>>>        unsigned int            gang_size;
>>>> +     unsigned int            gang_leader_idx;
>>>>        struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
>>>>        struct amdgpu_job       *jobs[AMDGPU_CS_GANG_SIZE];
>>>>        struct amdgpu_job       *gang_leader;



More information about the amd-gfx mailing list