[PATCH] drm/amdgpu: cleanup amdgpu_vm_flush v5

Thu Apr 10 03:52:05 UTC 2025

On 4/9/2025 7:16 PM, SRINIVASAN SHANMUGAM wrote:
>
> On 4/9/2025 7:11 PM, SRINIVASAN SHANMUGAM wrote:
>>
>> On 4/9/2025 6:45 PM, SRINIVASAN SHANMUGAM wrote:
>>>
>>> On 4/9/2025 4:15 PM, Christian König wrote:
>>>> This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. 
>>>> Turned out
>>>> that this has some negative consequences for some workloads. 
>>>> Instead check
>>>> if the cleaner shader should run directly.
>>>>
>>>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
>>>> if the VMID has seen a GPU reset since last use and the gds switch
>>>> setiing can be handled more simply as well.
>>>>
>>>> Also remove some duplicate checks and re-order and document the code.
>>>>
>>>> v2: restructure the while function
>>>> v3: fix logic error pointed out by Srini
>>>> v4: fix typo in comment, fix crash caused by incorrect check
>>>> v5: once more fix the logic
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  6 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 94 
>>>> ++++++++++----------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 +-
>>>>   3 files changed, 39 insertions(+), 66 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>> index 802743efa3b3..30b58772598c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>> @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring 
>>>> *ring, unsigned int num_ibs,
>>>>       }
>>>>         need_ctx_switch = ring->current_ctx != fence_ctx;
>>>> -    if (ring->funcs->emit_pipeline_sync && job &&
>>>> -        ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
>>>> -         need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, 
>>>> job))) {
>>>> -
>>>> +    if ((job && (tmp = 
>>>> amdgpu_sync_get_fence(&job->explicit_sync))) ||
>>>
>>>
>>> Direct assignment in if condition looks like may not be allowed, may 
>>> be can we split this logic , something like below:?
>>>
>>> /* Check if job is present and get the fence */
>>> if (job) {
>>>     tmp = amdgpu_sync_get_fence(&job->explicit_sync);
>>> }
>>>
>>> /* Check if pipe sync is needed */
>>> if ((tmp || (amdgpu_sriov_vf(adev) && need_ctx_switch))) {
>>>     need_pipe_sync = true;
>>>
>>>
>>>> +         (amdgpu_sriov_vf(adev) && need_ctx_switch)) {
>>>>           need_pipe_sync = true;
>>>>             if (tmp)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index b5ddfcbbc9fc..8e99dbd70968 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct 
>>>> amdgpu_device *adev)
>>>>       }
>>>>   }
>>>>   -/**
>>>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for 
>>>> job.
>>>> - *
>>>> - * @ring: ring on which the job will be submitted
>>>> - * @job: job to submit
>>>> - *
>>>> - * Returns:
>>>> - * True if sync is needed.
>>>> - */
>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>>> -                  struct amdgpu_job *job)
>>>> -{
>>>> -    struct amdgpu_device *adev = ring->adev;
>>>> -    unsigned vmhub = ring->vm_hub;
>>>> -    struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>>> -
>>>> -    if (job->vmid == 0)
>>>> -        return false;
>>>> -
>>>> -    if (job->vm_needs_flush || ring->has_compute_vm_bug)
>>>> -        return true;
>>>> -
>>>> -    if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
>>>> -        return true;
>>>> -
>>>> -    if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
>>>> -        return true;
>>>> -
>>>> -    return false;
>>>> -}
>>>> -
>>>>   /**
>>>>    * amdgpu_vm_flush - hardware flush the vm
>>>>    *
>>>> @@ -647,43 +616,49 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, 
>>>> struct amdgpu_job *job,
>>>>       unsigned vmhub = ring->vm_hub;
>>>>       struct amdgpu_vmid_mgr *id_mgr = 
>>>> &adev->vm_manager.id_mgr[vmhub];
>>>>       struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>>>> -    bool spm_update_needed = job->spm_update_needed;
>>>> -    bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>>>> -        job->gds_switch_needed;
>>>> -    bool vm_flush_needed = job->vm_needs_flush;
>>>> -    bool cleaner_shader_needed = false;
>>>> -    bool pasid_mapping_needed = false;
>>>> -    struct dma_fence *fence = NULL;
>>>> +    bool gds_switch_needed, vm_flush_needed, spm_update_needed,
>>>> +         cleaner_shader_needed
>>>
>>>
>>> I think, should we initialize the "cleaner_shader_needed" here, 
>>> cleaner_shader_needed = false?
>>>
>>
>> or somehow, try to move below to here?
>>
>> "     cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>>          ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>>          &job->base.s_fence->scheduled == isolation->spearhead;"?
>>
>>>
>>>> , pasid_mapping_needed;
>>>> +    struct dma_fence *fence;
>>>>       unsigned int patch;
>>>>       int r;
>>>>   +    /* First of all figure out what needs to be done */
>>>>       if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>>> +        need_pipe_sync = true;
>>>>           gds_switch_needed = true;
>>>>           vm_flush_needed = true;
>>>>           pasid_mapping_needed = true;
>>>>           spm_update_needed = true;
>>>> +    } else {
>>>> +        gds_switch_needed = job->gds_switch_needed;
>>>> +        vm_flush_needed = job->vm_needs_flush;
>>>> +        mutex_lock(&id_mgr->lock);
>>>> +        pasid_mapping_needed = id->pasid != job->pasid ||
>>>> +            !id->pasid_mapping ||
>>>> +            !dma_fence_is_signaled(id->pasid_mapping);
>>>> +        mutex_unlock(&id_mgr->lock);
>>>> +        spm_update_needed = job->spm_update_needed;
>>>> +        need_pipe_sync |= ring->has_compute_vm_bug || 
>>>> vm_flush_needed ||
>>>> +            cleaner_shader_needed 
>
> Sorry here pls:
>
> "     cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>          ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>          &job->base.s_fence->scheduled == isolation->spearhead;"?
>
>
>>>> || gds_switch_needed;
>>>>       }
>>>>   -    mutex_lock(&id_mgr->lock);
>>>> -    if (id->pasid != job->pasid || !id->pasid_mapping ||
>>>> -        !dma_fence_is_signaled(id->pasid_mapping))
>>>> -        pasid_mapping_needed = true;
>>>> -    mutex_unlock(&id_mgr->lock);
>>>> -
>>>> +    need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
>>>>       gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>>>       vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>>>               job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>>>       pasid_mapping_needed &= 
>>>> adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>>>           ring->funcs->emit_wreg;
>>>> +    spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
>>>>         cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>>>>           ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>>>>           &job->base.s_fence->scheduled == isolation->spearhead;
>>>>         if (!vm_flush_needed && !gds_switch_needed && 
>>>> !need_pipe_sync &&
>>>> -        !cleaner_shader_needed)
>>>> +        !cleaner_shader_needed && !spm_update_needed)

Here do we need to explicitly add this  "&& !spm_update_needed" check 
here along with the other checks pls? cz pipeline_sync is independent of 
"spm_update"?

>>>>           return 0;
>>>>   +    /* Then actually prepare the submission frame */
>>>>       amdgpu_ring_ib_begin(ring);
>>>>       if (ring->funcs->init_cond_exec)
>>>>           patch = amdgpu_ring_init_cond_exec(ring,
>>>> @@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, 
>>>> struct amdgpu_job *job,
>>>>       if (pasid_mapping_needed)
>>>>           amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
>>>>   -    if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
>>>> +    if (spm_update_needed)
>>>>           adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
>>>>   -    if (ring->funcs->emit_gds_switch &&
>>>> -        gds_switch_needed) {
>>>> +    if (gds_switch_needed)
>>>>           amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
>>>>                           job->gds_size, job->gws_base,
>>>>                           job->gws_size, job->oa_base,
>>>>                           job->oa_size);
>>>> -    }
>>>>         if (vm_flush_needed || pasid_mapping_needed || 
>>>> cleaner_shader_needed) {
>>>>           r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>>>           if (r)
>>>>               return r;
>>>> +    } else {
>>>> +        fence = NULL;
>>>> +    }
>>>> +
>>>> +    amdgpu_ring_patch_cond_exec(ring, patch);
>>>> +
>>>> +    /* the double SWITCH_BUFFER here *cannot* be skipped by 
>>>> COND_EXEC */
>>>> +    if (ring->funcs->emit_switch_buffer) {
>>>> +        amdgpu_ring_emit_switch_buffer(ring);
>>>> +        amdgpu_ring_emit_switch_buffer(ring);
>>>>       }
>>>>   +    amdgpu_ring_ib_end(ring);
>>>> +
>>>> +    /* And finally remember what the ring has executed */
>>>>       if (vm_flush_needed) {
>>>>           mutex_lock(&id_mgr->lock);
>>>>           dma_fence_put(id->last_flush);
>>>> @@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, 
>>>> struct amdgpu_job *job,
>>>> mutex_unlock(&adev->enforce_isolation_mutex);
>>>>       }
>>>>       dma_fence_put(fence);
>>>> -
>>>> -    amdgpu_ring_patch_cond_exec(ring, patch);
>>>> -
>>>> -    /* the double SWITCH_BUFFER here *cannot* be skipped by 
>>>> COND_EXEC */
>>>> -    if (ring->funcs->emit_switch_buffer) {
>>>> -        amdgpu_ring_emit_switch_buffer(ring);
>>>> -        amdgpu_ring_emit_switch_buffer(ring);
>>>> -    }
>>>> -
>>>> -    amdgpu_ring_ib_end(ring);
>>>>       return 0;
>>>>   }
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index daa2f9b33620..e9ecdb96bafa 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm,
>>>>                  struct ww_acquire_ctx *ticket,
>>>>                  int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>                  void *param);
>>>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job 
>>>> *job, bool need_pipe_sync);
>>>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>> +            bool need_pipe_sync);
>>>>   int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>>>>                 struct amdgpu_vm *vm, bool immediate);
>>>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>> @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device 
>>>> *adev, uint32_t min_vm_size,
>>>>                  uint32_t fragment_size_default, unsigned max_level,
>>>>                  unsigned max_bits);
>>>>   int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct 
>>>> drm_file *filp);
>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>>> -                  struct amdgpu_job *job);
>>>>   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>>>     struct amdgpu_task_info *