[PATCH] drm/amdgpu: Get rid of amdgpu_job->external_hw_fence

Andrey Grodzovsky andrey.grodzovsky at amd.com
Wed Jul 13 19:32:19 UTC 2022


On 2022-07-13 13:33, Christian König wrote:
> Am 13.07.22 um 19:13 schrieb Andrey Grodzovsky:
>> This is a follow-up cleanup to [1]. See bellow refcount balancing
>> for calling amdgpu_job_submit_direct after this cleanup as far
>> as I calculated.
>>
>> amdgpu_fence_emit
>>     dma_fence_init 1
>>     dma_fence_get(fence) 2
>>     rcu_assign_pointer(*ptr, dma_fence_get(fence) 3
>>
>> ---> amdgpu_job_submit_direct completes before fence signaled
>>             amdgpu_sa_bo_free
>>                 (*sa_bo)->fence = dma_fence_get(fence) 4
>>
>>             amdgpu_job_free
>>                 dma_fence_put 3
>>
>>             amdgpu_vcn_enc_get_destroy_msg
>>                 *fence = dma_fence_get(f) 4
>>                 dma_fence_put(f); 3
>>
>>             amdgpu_vcn_enc_ring_test_ib
>>                 dma_fence_put(fence) 2
>>
>>             amdgpu_fence_process
>>                 dma_fence_put 1
>>
>>             amdgpu_sa_bo_remove_locked
>>                 dma_fence_put 0
>>
>> ---> amdgpu_job_submit_direct completes after fence signaled
>>             amdgpu_fence_process
>>                 dma_fence_put 2
>>
>>             amdgpu_job_free
>>                 dma_fence_put 1
>>
>>             amdgpu_vcn_enc_get_destroy_msg
>>                 *fence = dma_fence_get(f) 2
>>                 dma_fence_put(f); 1
>>
>>             amdgpu_vcn_enc_ring_test_ib
>>                 dma_fence_put(fence) 0
>>
>> [1] - 
>> https://patchwork.kernel.org/project/dri-devel/cover/20220624180955.485440-1-andrey.grodzovsky@amd.com/
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> Suggested-by: Christian König <christian.koenig at amd.com>
>
> Of hand that looks correct to me, but could be that I'm missing 
> something as well.
>
> Anyway I think I can give an Reviewed-by: Christian König 
> <christian.koenig at amd.com> for this.
>
> Thanks,
> Christian.


Pushed, thanks.

Andrey


>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 27 ++++------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h    |  1 -
>>   3 files changed, 6 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 16faea7ed1cd..b79ee4ffb879 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -5229,8 +5229,7 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>        *
>>        * job->base holds a reference to parent fence
>>        */
>> -    if (job && (job->hw_fence.ops != NULL) &&
>> -        dma_fence_is_signaled(&job->hw_fence)) {
>> +    if (job && dma_fence_is_signaled(&job->hw_fence)) {
>>           job_signaled = true;
>>           dev_info(adev->dev, "Guilty job already signaled, skipping 
>> HW reset");
>>           goto skip_hw_reset;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index 6fa381ee5fa0..10fdd12cf853 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -134,16 +134,10 @@ void amdgpu_job_free_resources(struct 
>> amdgpu_job *job)
>>   {
>>       struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
>>       struct dma_fence *f;
>> -    struct dma_fence *hw_fence;
>>       unsigned i;
>>   -    if (job->hw_fence.ops == NULL)
>> -        hw_fence = job->external_hw_fence;
>> -    else
>> -        hw_fence = &job->hw_fence;
>> -
>>       /* use sched fence if available */
>> -    f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence;
>> +    f = job->base.s_fence ? &job->base.s_fence->finished :  
>> &job->hw_fence;
>>       for (i = 0; i < job->num_ibs; ++i)
>>           amdgpu_ib_free(ring->adev, &job->ibs[i], f);
>>   }
>> @@ -157,11 +151,7 @@ static void amdgpu_job_free_cb(struct 
>> drm_sched_job *s_job)
>>       amdgpu_sync_free(&job->sync);
>>       amdgpu_sync_free(&job->sched_sync);
>>   -    /* only put the hw fence if has embedded fence */
>> -    if (job->hw_fence.ops != NULL)
>> -        dma_fence_put(&job->hw_fence);
>> -    else
>> -        kfree(job);
>> +    dma_fence_put(&job->hw_fence);
>>   }
>>     void amdgpu_job_free(struct amdgpu_job *job)
>> @@ -170,11 +160,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
>>       amdgpu_sync_free(&job->sync);
>>       amdgpu_sync_free(&job->sched_sync);
>>   -    /* only put the hw fence if has embedded fence */
>> -    if (job->hw_fence.ops != NULL)
>> -        dma_fence_put(&job->hw_fence);
>> -    else
>> -        kfree(job);
>> +    dma_fence_put(&job->hw_fence);
>>   }
>>     int amdgpu_job_submit(struct amdgpu_job *job, struct 
>> drm_sched_entity *entity,
>> @@ -204,15 +190,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job 
>> *job, struct amdgpu_ring *ring,
>>       int r;
>>         job->base.sched = &ring->sched;
>> -    r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
>> -    /* record external_hw_fence for direct submit */
>> -    job->external_hw_fence = dma_fence_get(*fence);
>> +    r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
>> +
>>       if (r)
>>           return r;
>>         amdgpu_job_free(job);
>> -    dma_fence_put(*fence);
>> -
>>       return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> index d599c0540b46..babc0af751c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> @@ -50,7 +50,6 @@ struct amdgpu_job {
>>       struct amdgpu_sync    sync;
>>       struct amdgpu_sync    sched_sync;
>>       struct dma_fence    hw_fence;
>> -    struct dma_fence    *external_hw_fence;
>>       uint32_t        preamble_status;
>>       uint32_t                preemption_status;
>>       bool                    vm_needs_flush;
>


More information about the amd-gfx mailing list