[PATCH] drm/amdgpu: cleanup amdgpu_vm_flush v5
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Apr 10 13:33:51 UTC 2025
Am 10.04.25 um 05:52 schrieb SRINIVASAN SHANMUGAM:
>
> On 4/9/2025 7:16 PM, SRINIVASAN SHANMUGAM wrote:
>>
>> On 4/9/2025 7:11 PM, SRINIVASAN SHANMUGAM wrote:
>>>
>>> On 4/9/2025 6:45 PM, SRINIVASAN SHANMUGAM wrote:
>>>>
>>>> On 4/9/2025 4:15 PM, Christian König wrote:
>>>>> This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned out
>>>>> that this has some negative consequences for some workloads. Instead check
>>>>> if the cleaner shader should run directly.
>>>>>
>>>>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
>>>>> if the VMID has seen a GPU reset since last use and the gds switch
>>>>> setiing can be handled more simply as well.
>>>>>
>>>>> Also remove some duplicate checks and re-order and document the code.
>>>>>
>>>>> v2: restructure the while function
>>>>> v3: fix logic error pointed out by Srini
>>>>> v4: fix typo in comment, fix crash caused by incorrect check
>>>>> v5: once more fix the logic
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>>> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
>>>>> ---
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +-
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 94 ++++++++++----------------
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +-
>>>>> 3 files changed, 39 insertions(+), 66 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> index 802743efa3b3..30b58772598c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>>>>> }
>>>>> need_ctx_switch = ring->current_ctx != fence_ctx;
>>>>> - if (ring->funcs->emit_pipeline_sync && job &&
>>>>> - ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
>>>>> - need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
>>>>> -
>>>>> + if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) ||
>>>>
>>>>
>>>> Direct assignment in if condition looks like may not be allowed, may be can we split this logic , something like below:?
You can do direct assignment in if condition if you put it into an extra (), but I agree that we should clean that up at some point.
Just not in this patch here since that is unrelated.
>>>>
>>>> /* Check if job is present and get the fence */
>>>> if (job) {
>>>> tmp = amdgpu_sync_get_fence(&job->explicit_sync);
>>>> }
>>>>
>>>> /* Check if pipe sync is needed */
>>>> if ((tmp || (amdgpu_sriov_vf(adev) && need_ctx_switch))) {
>>>> need_pipe_sync = true;
>>>>
>>>>
>>>>> + (amdgpu_sriov_vf(adev) && need_ctx_switch)) {
>>>>> need_pipe_sync = true;
>>>>> if (tmp)
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index b5ddfcbbc9fc..8e99dbd70968 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
>>>>> }
>>>>> }
>>>>> -/**
>>>>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
>>>>> - *
>>>>> - * @ring: ring on which the job will be submitted
>>>>> - * @job: job to submit
>>>>> - *
>>>>> - * Returns:
>>>>> - * True if sync is needed.
>>>>> - */
>>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>>>> - struct amdgpu_job *job)
>>>>> -{
>>>>> - struct amdgpu_device *adev = ring->adev;
>>>>> - unsigned vmhub = ring->vm_hub;
>>>>> - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>>>> -
>>>>> - if (job->vmid == 0)
>>>>> - return false;
>>>>> -
>>>>> - if (job->vm_needs_flush || ring->has_compute_vm_bug)
>>>>> - return true;
>>>>> -
>>>>> - if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
>>>>> - return true;
>>>>> -
>>>>> - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
>>>>> - return true;
>>>>> -
>>>>> - return false;
>>>>> -}
>>>>> -
>>>>> /**
>>>>> * amdgpu_vm_flush - hardware flush the vm
>>>>> *
>>>>> @@ -647,43 +616,49 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>>> unsigned vmhub = ring->vm_hub;
>>>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>>>>> - bool spm_update_needed = job->spm_update_needed;
>>>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>>>>> - job->gds_switch_needed;
>>>>> - bool vm_flush_needed = job->vm_needs_flush;
>>>>> - bool cleaner_shader_needed = false;
>>>>> - bool pasid_mapping_needed = false;
>>>>> - struct dma_fence *fence = NULL;
>>>>> + bool gds_switch_needed, vm_flush_needed, spm_update_needed,
>>>>> + cleaner_shader_needed
>>>>
>>>>
>>>> I think, should we initialize the "cleaner_shader_needed" here, cleaner_shader_needed = false?
>>>>
>>>
>>> or somehow, try to move below to here?
>>>
>>> " cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>>> ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>>> &job->base.s_fence->scheduled == isolation->spearhead;"?
Oh, yeah that's a good idea going to add that.
>>>
>>>>
>>>>> , pasid_mapping_needed;
>>>>> + struct dma_fence *fence;
>>>>> unsigned int patch;
>>>>> int r;
>>>>> + /* First of all figure out what needs to be done */
>>>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>>>> + need_pipe_sync = true;
>>>>> gds_switch_needed = true;
>>>>> vm_flush_needed = true;
>>>>> pasid_mapping_needed = true;
>>>>> spm_update_needed = true;
>>>>> + } else {
>>>>> + gds_switch_needed = job->gds_switch_needed;
>>>>> + vm_flush_needed = job->vm_needs_flush;
>>>>> + mutex_lock(&id_mgr->lock);
>>>>> + pasid_mapping_needed = id->pasid != job->pasid ||
>>>>> + !id->pasid_mapping ||
>>>>> + !dma_fence_is_signaled(id->pasid_mapping);
>>>>> + mutex_unlock(&id_mgr->lock);
>>>>> + spm_update_needed = job->spm_update_needed;
>>>>> + need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed ||
>>>>> + cleaner_shader_needed
>>
>> Sorry here pls:
>>
>> " cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>> ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>> &job->base.s_fence->scheduled == isolation->spearhead;"?
>>
>>
>>>>> || gds_switch_needed;
>>>>> }
>>>>> - mutex_lock(&id_mgr->lock);
>>>>> - if (id->pasid != job->pasid || !id->pasid_mapping ||
>>>>> - !dma_fence_is_signaled(id->pasid_mapping))
>>>>> - pasid_mapping_needed = true;
>>>>> - mutex_unlock(&id_mgr->lock);
>>>>> -
>>>>> + need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
>>>>> gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>>>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>>>> ring->funcs->emit_wreg;
>>>>> + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
>>>>> cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>>>>> ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>>>>> &job->base.s_fence->scheduled == isolation->spearhead;
>>>>> if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
>>>>> - !cleaner_shader_needed)
>>>>> + !cleaner_shader_needed && !spm_update_needed)
>
>
> Here do we need to explicitly add this "&& !spm_update_needed" check here along with the other checks pls? cz pipeline_sync is independent of "spm_update"?
It isn't really necessary I think, but it shouldn't hurt either. Going to add that.
Thanks,
Christian.
>
>
>>>>> return 0;
>>>>> + /* Then actually prepare the submission frame */
>>>>> amdgpu_ring_ib_begin(ring);
>>>>> if (ring->funcs->init_cond_exec)
>>>>> patch = amdgpu_ring_init_cond_exec(ring,
>>>>> @@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>>> if (pasid_mapping_needed)
>>>>> amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
>>>>> - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
>>>>> + if (spm_update_needed)
>>>>> adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
>>>>> - if (ring->funcs->emit_gds_switch &&
>>>>> - gds_switch_needed) {
>>>>> + if (gds_switch_needed)
>>>>> amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
>>>>> job->gds_size, job->gws_base,
>>>>> job->gws_size, job->oa_base,
>>>>> job->oa_size);
>>>>> - }
>>>>> if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
>>>>> r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>>>> if (r)
>>>>> return r;
>>>>> + } else {
>>>>> + fence = NULL;
>>>>> + }
>>>>> +
>>>>> + amdgpu_ring_patch_cond_exec(ring, patch);
>>>>> +
>>>>> + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>>>>> + if (ring->funcs->emit_switch_buffer) {
>>>>> + amdgpu_ring_emit_switch_buffer(ring);
>>>>> + amdgpu_ring_emit_switch_buffer(ring);
>>>>> }
>>>>> + amdgpu_ring_ib_end(ring);
>>>>> +
>>>>> + /* And finally remember what the ring has executed */
>>>>> if (vm_flush_needed) {
>>>>> mutex_lock(&id_mgr->lock);
>>>>> dma_fence_put(id->last_flush);
>>>>> @@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>>> mutex_unlock(&adev->enforce_isolation_mutex);
>>>>> }
>>>>> dma_fence_put(fence);
>>>>> -
>>>>> - amdgpu_ring_patch_cond_exec(ring, patch);
>>>>> -
>>>>> - /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>>>>> - if (ring->funcs->emit_switch_buffer) {
>>>>> - amdgpu_ring_emit_switch_buffer(ring);
>>>>> - amdgpu_ring_emit_switch_buffer(ring);
>>>>> - }
>>>>> -
>>>>> - amdgpu_ring_ib_end(ring);
>>>>> return 0;
>>>>> }
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index daa2f9b33620..e9ecdb96bafa 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>> struct ww_acquire_ctx *ticket,
>>>>> int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>> void *param);
>>>>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
>>>>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>>> + bool need_pipe_sync);
>>>>> int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>>>>> struct amdgpu_vm *vm, bool immediate);
>>>>> int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>>>> @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>>>>> uint32_t fragment_size_default, unsigned max_level,
>>>>> unsigned max_bits);
>>>>> int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>>>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>>>> - struct amdgpu_job *job);
>>>>> void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>>>> struct amdgpu_task_info *
More information about the amd-gfx
mailing list