[PATCH] drm/amdgpu: use the last IB as gang leader v2

Timur Kristóf timur.kristof at gmail.com
Tue Nov 15 14:02:11 UTC 2022


I can confirm this patch solves an issue with gang submit.
It removes the necessity to sort IBs by IP type in user space.
Now only the IP type of the last IB matters, as was intended.

Tested-by: Timur Kristóf <timur.kristof at gmail.com>
Acked-by: Timur Kristóf <timur.kristof at gmail.com>

On Tue, 2022-11-15 at 10:43 +0100, Christian König wrote:
> Am 15.11.22 um 10:42 schrieb Christian König:
> > It turned out that not the last IB specified is the gang leader,
> > but instead the last job allocated.
> > 
> > This is a bit unfortunate and not very intuitive for the CS
> > interface, so try to fix this.
> 
> Alex could you take a look at this? I would really like to get this
> into 
> the next -rc.
> 
> Thanks,
> Christian.
> 
> > 
> > Signed-off-by: Christian König <christian.koenig at amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ++++++++++++++++-----
> > --
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |  1 +
> >   2 files changed, 17 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 1bbd39b3b0fc..fbdf139cf497 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct
> > amdgpu_cs_parser *p,
> >                 return r;
> >   
> >         ++(num_ibs[r]);
> > +       p->gang_leader_idx = r;
> >         return 0;
> >   }
> >   
> > @@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct
> > amdgpu_cs_parser *p,
> >                 if (ret)
> >                         goto free_all_kdata;
> >         }
> > -       p->gang_leader = p->jobs[p->gang_size - 1];
> > +       p->gang_leader = p->jobs[p->gang_leader_idx];
> >   
> >         if (p->ctx->vram_lost_counter != p->gang_leader-
> > >vram_lost_counter) {
> >                 ret = -ECANCELED;
> > @@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct
> > amdgpu_cs_parser *p)
> >                         return r;
> >         }
> >   
> > -       for (i = 0; i < p->gang_size - 1; ++i) {
> > +       for (i = 0; i < p->gang_size; ++i) {
> > +               if (p->jobs[i] == leader)
> > +                       continue;
> > +
> >                 r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]-
> > >sync);
> >                 if (r)
> >                         return r;
> >         }
> >   
> > -       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p-
> > >gang_size - 1]);
> > +       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p-
> > >gang_leader_idx]);
> >         if (r && r != -ERESTARTSYS)
> >                 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
> > -
> >         return r;
> >   }
> >   
> > @@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >         for (i = 0; i < p->gang_size; ++i)
> >                 drm_sched_job_arm(&p->jobs[i]->base);
> >   
> > -       for (i = 0; i < (p->gang_size - 1); ++i) {
> > +       for (i = 0; i < p->gang_size; ++i) {
> >                 struct dma_fence *fence;
> >   
> > +               if (p->jobs[i] == leader)
> > +                       continue;
> > +
> >                 fence = &p->jobs[i]->base.s_fence->scheduled;
> >                 r = amdgpu_sync_fence(&leader->sync, fence);
> >                 if (r)
> > @@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >         list_for_each_entry(e, &p->validated, tv.head) {
> >   
> >                 /* Everybody except for the gang leader uses READ
> > */
> > -               for (i = 0; i < (p->gang_size - 1); ++i) {
> > +               for (i = 0; i < p->gang_size; ++i) {
> > +                       if (p->jobs[i] == leader)
> > +                               continue;
> > +
> >                         dma_resv_add_fence(e->tv.bo->base.resv,
> >                                            &p->jobs[i]-
> > >base.s_fence->finished,
> >                                            DMA_RESV_USAGE_READ);
> > @@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct
> > amdgpu_cs_parser *p,
> >                 e->tv.num_shared = 0;
> >         }
> >   
> > -       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size
> > - 1],
> > +       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p-
> > >gang_leader_idx],
> >                                    p->fence);
> >         amdgpu_cs_post_dependencies(p);
> >   
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > index cbaa19b2b8a3..f80adf9069ec 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
> > @@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
> >   
> >         /* scheduler job objects */
> >         unsigned int            gang_size;
> > +       unsigned int            gang_leader_idx;
> >         struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
> >         struct amdgpu_job       *jobs[AMDGPU_CS_GANG_SIZE];
> >         struct amdgpu_job       *gang_leader;
> 



More information about the amd-gfx mailing list