[PATCH v2] drm/amdgpu: Get rid of dep_sync as a seperate object.

Fri Dec 1 20:29:00 UTC 2017

On 12/01/2017 02:59 PM, Christian König wrote:
> Am 01.12.2017 um 20:39 schrieb Andrey Grodzovsky:
>> Instead mark fence as explicit in it's amdgpu_sync_entry.
>>
>> v2:
>> Fix use after free bug and add new parameter description.
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 14 +++++++-------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 26 
>> ++++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 23 +++++++++++++++++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  6 +++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 16 ++++++++--------
>>   7 files changed, 48 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index f8657c3..c56a986 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1121,7 +1121,6 @@ struct amdgpu_job {
>>       struct amdgpu_vm    *vm;
>>       struct amdgpu_ring    *ring;
>>       struct amdgpu_sync    sync;
>> -    struct amdgpu_sync    dep_sync;
>>       struct amdgpu_sync    sched_sync;
>>       struct amdgpu_ib    *ibs;
>>       struct dma_fence    *fence; /* the hw fence */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d15836b..b694d35 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -788,7 +788,7 @@ static int amdgpu_bo_vm_update_pte(struct 
>> amdgpu_cs_parser *p)
>>           return r;
>>         r = amdgpu_sync_fence(adev, &p->job->sync,
>> -                  fpriv->prt_va->last_pt_update);
>> +                  fpriv->prt_va->last_pt_update, false);
>>       if (r)
>>           return r;
>>   @@ -802,7 +802,7 @@ static int amdgpu_bo_vm_update_pte(struct 
>> amdgpu_cs_parser *p)
>>               return r;
>>             f = bo_va->last_pt_update;
>> -        r = amdgpu_sync_fence(adev, &p->job->sync, f);
>> +        r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
>>           if (r)
>>               return r;
>>       }
>> @@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct 
>> amdgpu_cs_parser *p)
>>                   return r;
>>                 f = bo_va->last_pt_update;
>> -            r = amdgpu_sync_fence(adev, &p->job->sync, f);
>> +            r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
>>               if (r)
>>                   return r;
>>           }
>> @@ -836,7 +836,7 @@ static int amdgpu_bo_vm_update_pte(struct 
>> amdgpu_cs_parser *p)
>>       if (r)
>>           return r;
>>   -    r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
>> +    r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
>>       if (r)
>>           return r;
>>   @@ -1040,8 +1040,8 @@ static int amdgpu_cs_process_fence_dep(struct 
>> amdgpu_cs_parser *p,
>>               amdgpu_ctx_put(ctx);
>>               return r;
>>           } else if (fence) {
>> -            r = amdgpu_sync_fence(p->adev, &p->job->dep_sync,
>> -                          fence);
>> +            r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
>> +                    true);
>>               dma_fence_put(fence);
>>               amdgpu_ctx_put(ctx);
>>               if (r)
>> @@ -1060,7 +1060,7 @@ static int 
>> amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>       if (r)
>>           return r;
>>   -    r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, fence);
>> +    r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
>>       dma_fence_put(fence);
>>         return r;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 659997b..0cf86eb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
>> unsigned num_ibs,
>>       }
>>         if (ring->funcs->emit_pipeline_sync && job &&
>> -        ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
>> +        ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
>>            amdgpu_vm_need_pipeline_sync(ring, job))) {
>>           need_pipe_sync = true;
>>           dma_fence_put(tmp);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index 18770a8..61fb934 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -60,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, 
>> unsigned num_ibs,
>>       (*job)->num_ibs = num_ibs;
>>         amdgpu_sync_create(&(*job)->sync);
>> -    amdgpu_sync_create(&(*job)->dep_sync);
>>       amdgpu_sync_create(&(*job)->sched_sync);
>>       (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>>   @@ -104,7 +103,6 @@ static void amdgpu_job_free_cb(struct 
>> amd_sched_job *s_job)
>>       amdgpu_ring_priority_put(job->ring, s_job->s_priority);
>>       dma_fence_put(job->fence);
>>       amdgpu_sync_free(&job->sync);
>> -    amdgpu_sync_free(&job->dep_sync);
>>       amdgpu_sync_free(&job->sched_sync);
>>       kfree(job);
>>   }
>> @@ -115,7 +113,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
>>         dma_fence_put(job->fence);
>>       amdgpu_sync_free(&job->sync);
>> -    amdgpu_sync_free(&job->dep_sync);
>>       amdgpu_sync_free(&job->sched_sync);
>>       kfree(job);
>>   }
>> @@ -149,17 +146,18 @@ static struct dma_fence 
>> *amdgpu_job_dependency(struct amd_sched_job *sched_job,
>>   {
>>       struct amdgpu_job *job = to_amdgpu_job(sched_job);
>>       struct amdgpu_vm *vm = job->vm;
>> -
>> -    struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
>> +    bool explicit = false;
>>       int r;
>> -
>> -    if (amd_sched_dependency_optimized(fence, s_entity)) {
>> -        r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
>> -        if (r)
>> -            DRM_ERROR("Error adding fence to sync (%d)\n", r);
>> +    struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, 
>> &explicit);
>> +
>> +    if (fence && explicit) {
>> +        if (amd_sched_dependency_optimized(fence, s_entity)) {
>> +            r = amdgpu_sync_fence(job->adev, &job->sched_sync, 
>> fence, false);
>> +            if (r)
>> +                DRM_ERROR("Error adding fence to sync (%d)\n", r);
>> +        }
>>       }
>> -    if (!fence)
>> -        fence = amdgpu_sync_get_fence(&job->sync);
>> +
>>       while (fence == NULL && vm && !job->vm_id) {
>>           struct amdgpu_ring *ring = job->ring;
>>   @@ -169,7 +167,7 @@ static struct dma_fence 
>> *amdgpu_job_dependency(struct amd_sched_job *sched_job,
>>           if (r)
>>               DRM_ERROR("Error getting VM ID (%d)\n", r);
>>   -        fence = amdgpu_sync_get_fence(&job->sync);
>> +        fence = amdgpu_sync_get_fence(&job->sync, NULL);
>>       }
>>         return fence;
>> @@ -190,7 +188,7 @@ static struct dma_fence *amdgpu_job_run(struct 
>> amd_sched_job *sched_job)
>>       finished = &job->base.s_fence->finished;
>>       adev = job->adev;
>>   -    BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
>> +    BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL, NULL));
>>         trace_amdgpu_sched_run_job(job);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> index a4bf21f..6c3dda1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> @@ -35,6 +35,7 @@
>>   struct amdgpu_sync_entry {
>>       struct hlist_node    node;
>>       struct dma_fence    *fence;
>> +    bool    explicit;
>>   };
>>     static struct kmem_cache *amdgpu_sync_slab;
>> @@ -141,7 +142,7 @@ static bool amdgpu_sync_add_later(struct 
>> amdgpu_sync *sync, struct dma_fence *f)
>>    *
>>    */
>>   int amdgpu_sync_fence(struct amdgpu_device *adev, struct 
>> amdgpu_sync *sync,
>> -              struct dma_fence *f)
>> +              struct dma_fence *f, bool explicit)
>>   {
>>       struct amdgpu_sync_entry *e;
>>   @@ -159,6 +160,8 @@ int amdgpu_sync_fence(struct amdgpu_device 
>> *adev, struct amdgpu_sync *sync,
>>       if (!e)
>>           return -ENOMEM;
>>   +    e->explicit = explicit;
>> +
>>       hash_add(sync->fences, &e->node, f->context);
>>       e->fence = dma_fence_get(f);
>>       return 0;
>> @@ -189,7 +192,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>>         /* always sync to the exclusive fence */
>>       f = reservation_object_get_excl(resv);
>> -    r = amdgpu_sync_fence(adev, sync, f);
>> +    r = amdgpu_sync_fence(adev, sync, f, false);
>>         if (explicit_sync)
>>           return r;
>> @@ -220,7 +223,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>>                   continue;
>>           }
>>   -        r = amdgpu_sync_fence(adev, sync, f);
>> +        r = amdgpu_sync_fence(adev, sync, f, false);
>>           if (r)
>>               break;
>>       }
>> @@ -232,12 +235,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>>    *
>>    * @sync: the sync object
>>    * @ring: optional ring to use for test
>> + * @explicit: true if the next fence is explicit
>>    *
>>    * Returns the next fence not signaled yet without removing it from 
>> the sync
>>    * object.
>>    */
>>   struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>> -                     struct amdgpu_ring *ring)
>> +                     struct amdgpu_ring *ring,
>> +                     bool *explicit)
>>   {
>>       struct amdgpu_sync_entry *e;
>>       struct hlist_node *tmp;
>> @@ -247,6 +252,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct 
>> amdgpu_sync *sync,
>>           struct dma_fence *f = e->fence;
>>           struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
>>   +
>> +        if (explicit)
>> +            *explicit = e->explicit;
>> +
>
> We don't need this for _peek_fence, don't we?

Currently not, I assumed there might be some future use and also to 
make  amdgpu_sync_peek_fence and
amdgpu_sync_get_fence interfaces look the same. But i guess if there is 
no use then no need for extra parameter.
Already removed and pushed the change.

Thanks,
Andrey

>
> Apart from that the patch is Reviewed-by: Christian König 
> <christian.koenig at amd.com>.
>
> Now the next step for you is to think a bit about how we can further 
> optimize this.
>
> For example we don't need to keep the implicit fence around any more 
> when we add an explicit one which is newer.
>
> Regards,
> Christian.
>
>>           if (dma_fence_is_signaled(f)) {
>>               hash_del(&e->node);
>>               dma_fence_put(f);
>> @@ -275,19 +284,21 @@ struct dma_fence *amdgpu_sync_peek_fence(struct 
>> amdgpu_sync *sync,
>>    * amdgpu_sync_get_fence - get the next fence from the sync object
>>    *
>>    * @sync: sync object to use
>> + * @explicit: true if the next fence is explicit
>>    *
>>    * Get and removes the next fence from the sync object not signaled 
>> yet.
>>    */
>> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
>> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, 
>> bool *explicit)
>>   {
>>       struct amdgpu_sync_entry *e;
>>       struct hlist_node *tmp;
>>       struct dma_fence *f;
>>       int i;
>> -
>>       hash_for_each_safe(sync->fences, i, tmp, e, node) {
>>             f = e->fence;
>> +        if (explicit)
>> +            *explicit = e->explicit;
>>             hash_del(&e->node);
>>           kmem_cache_free(amdgpu_sync_slab, e);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
>> index 70d7e3a..82c614d8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
>> @@ -41,15 +41,15 @@ struct amdgpu_sync {
>>     void amdgpu_sync_create(struct amdgpu_sync *sync);
>>   int amdgpu_sync_fence(struct amdgpu_device *adev, struct 
>> amdgpu_sync *sync,
>> -              struct dma_fence *f);
>> +              struct dma_fence *f, bool explicit);
>>   int amdgpu_sync_resv(struct amdgpu_device *adev,
>>                struct amdgpu_sync *sync,
>>                struct reservation_object *resv,
>>                void *owner,
>>                bool explicit_sync);
>>   struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>> -                     struct amdgpu_ring *ring);
>> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
>> +                     struct amdgpu_ring *ring, bool *explicit);
>> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, 
>> bool *explicit);
>>   int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
>>   void amdgpu_sync_free(struct amdgpu_sync *sync);
>>   int amdgpu_sync_init(void);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7de519b..6a332f3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -486,9 +486,9 @@ static int 
>> amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
>>           needs_flush = true;
>>           /* to prevent one context starved by another context */
>>           id->pd_gpu_addr = 0;
>> -        tmp = amdgpu_sync_peek_fence(&id->active, ring);
>> +        tmp = amdgpu_sync_peek_fence(&id->active, ring, NULL);
>>           if (tmp) {
>> -            r = amdgpu_sync_fence(adev, sync, tmp);
>> +            r = amdgpu_sync_fence(adev, sync, tmp, false);
>>               return r;
>>           }
>>       }
>> @@ -496,7 +496,7 @@ static int 
>> amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
>>       /* Good we can use this VMID. Remember this submission as
>>       * user of the VMID.
>>       */
>> -    r = amdgpu_sync_fence(ring->adev, &id->active, fence);
>> +    r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>>       if (r)
>>           goto out;
>>   @@ -556,7 +556,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>       /* Check if we have an idle VMID */
>>       i = 0;
>>       list_for_each_entry(idle, &id_mgr->ids_lru, list) {
>> -        fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
>> +        fences[i] = amdgpu_sync_peek_fence(&idle->active, ring, NULL);
>>           if (!fences[i])
>>               break;
>>           ++i;
>> @@ -583,7 +583,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>           }
>>     -        r = amdgpu_sync_fence(ring->adev, sync, &array->base);
>> +        r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
>>           dma_fence_put(&array->base);
>>           if (r)
>>               goto error;
>> @@ -626,7 +626,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>           /* Good we can use this VMID. Remember this submission as
>>            * user of the VMID.
>>            */
>> -        r = amdgpu_sync_fence(ring->adev, &id->active, fence);
>> +        r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>>           if (r)
>>               goto error;
>>   @@ -646,7 +646,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, 
>> struct amdgpu_ring *ring,
>>       id = idle;
>>         /* Remember this submission as user of the VMID */
>> -    r = amdgpu_sync_fence(ring->adev, &id->active, fence);
>> +    r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>>       if (r)
>>           goto error;
>>   @@ -1657,7 +1657,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>> amdgpu_device *adev,
>>           addr = 0;
>>       }
>>   -    r = amdgpu_sync_fence(adev, &job->sync, exclusive);
>> +    r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
>>       if (r)
>>           goto error_free;
>