[PATCH] drm/amdgpu: cleanup amdgpu_vm_flush v5
SRINIVASAN SHANMUGAM
srinivasan.shanmugam at amd.com
Wed Apr 9 13:46:06 UTC 2025
On 4/9/2025 7:11 PM, SRINIVASAN SHANMUGAM wrote:
>
> On 4/9/2025 6:45 PM, SRINIVASAN SHANMUGAM wrote:
>>
>> On 4/9/2025 4:15 PM, Christian König wrote:
>>> This reverts commit c2cc3648ba517a6c270500b5447d5a1efdad5936. Turned
>>> out
>>> that this has some negative consequences for some workloads. Instead
>>> check
>>> if the cleaner shader should run directly.
>>>
>>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
>>> if the VMID has seen a GPU reset since last use and the gds switch
>>> setiing can be handled more simply as well.
>>>
>>> Also remove some duplicate checks and re-order and document the code.
>>>
>>> v2: restructure the while function
>>> v3: fix logic error pointed out by Srini
>>> v4: fix typo in comment, fix crash caused by incorrect check
>>> v5: once more fix the logic
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 94
>>> ++++++++++----------------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +-
>>> 3 files changed, 39 insertions(+), 66 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> index 802743efa3b3..30b58772598c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring
>>> *ring, unsigned int num_ibs,
>>> }
>>> need_ctx_switch = ring->current_ctx != fence_ctx;
>>> - if (ring->funcs->emit_pipeline_sync && job &&
>>> - ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
>>> - need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring,
>>> job))) {
>>> -
>>> + if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) ||
>>
>>
>> Direct assignment in if condition looks like may not be allowed, may
>> be can we split this logic , something like below:?
>>
>> /* Check if job is present and get the fence */
>> if (job) {
>> tmp = amdgpu_sync_get_fence(&job->explicit_sync);
>> }
>>
>> /* Check if pipe sync is needed */
>> if ((tmp || (amdgpu_sriov_vf(adev) && need_ctx_switch))) {
>> need_pipe_sync = true;
>>
>>
>>> + (amdgpu_sriov_vf(adev) && need_ctx_switch)) {
>>> need_pipe_sync = true;
>>> if (tmp)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index b5ddfcbbc9fc..8e99dbd70968 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -596,37 +596,6 @@ void amdgpu_vm_check_compute_bug(struct
>>> amdgpu_device *adev)
>>> }
>>> }
>>> -/**
>>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for
>>> job.
>>> - *
>>> - * @ring: ring on which the job will be submitted
>>> - * @job: job to submit
>>> - *
>>> - * Returns:
>>> - * True if sync is needed.
>>> - */
>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>> - struct amdgpu_job *job)
>>> -{
>>> - struct amdgpu_device *adev = ring->adev;
>>> - unsigned vmhub = ring->vm_hub;
>>> - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>> -
>>> - if (job->vmid == 0)
>>> - return false;
>>> -
>>> - if (job->vm_needs_flush || ring->has_compute_vm_bug)
>>> - return true;
>>> -
>>> - if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
>>> - return true;
>>> -
>>> - if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
>>> - return true;
>>> -
>>> - return false;
>>> -}
>>> -
>>> /**
>>> * amdgpu_vm_flush - hardware flush the vm
>>> *
>>> @@ -647,43 +616,49 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
>>> struct amdgpu_job *job,
>>> unsigned vmhub = ring->vm_hub;
>>> struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>>> - bool spm_update_needed = job->spm_update_needed;
>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>>> - job->gds_switch_needed;
>>> - bool vm_flush_needed = job->vm_needs_flush;
>>> - bool cleaner_shader_needed = false;
>>> - bool pasid_mapping_needed = false;
>>> - struct dma_fence *fence = NULL;
>>> + bool gds_switch_needed, vm_flush_needed, spm_update_needed,
>>> + cleaner_shader_needed
>>
>>
>> I think, should we initialize the "cleaner_shader_needed" here,
>> cleaner_shader_needed = false?
>>
>
> or somehow, try to move below to here?
>
> " cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
> ring->funcs->emit_cleaner_shader && job->base.s_fence &&
> &job->base.s_fence->scheduled == isolation->spearhead;"?
>
>>
>>> , pasid_mapping_needed;
>>> + struct dma_fence *fence;
>>> unsigned int patch;
>>> int r;
>>> + /* First of all figure out what needs to be done */
>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>> + need_pipe_sync = true;
>>> gds_switch_needed = true;
>>> vm_flush_needed = true;
>>> pasid_mapping_needed = true;
>>> spm_update_needed = true;
>>> + } else {
>>> + gds_switch_needed = job->gds_switch_needed;
>>> + vm_flush_needed = job->vm_needs_flush;
>>> + mutex_lock(&id_mgr->lock);
>>> + pasid_mapping_needed = id->pasid != job->pasid ||
>>> + !id->pasid_mapping ||
>>> + !dma_fence_is_signaled(id->pasid_mapping);
>>> + mutex_unlock(&id_mgr->lock);
>>> + spm_update_needed = job->spm_update_needed;
>>> + need_pipe_sync |= ring->has_compute_vm_bug ||
>>> vm_flush_needed ||
>>> + cleaner_shader_needed
Sorry here pls:
" cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader && job->base.s_fence &&
&job->base.s_fence->scheduled == isolation->spearhead;"?
>>> || gds_switch_needed;
>>> }
>>> - mutex_lock(&id_mgr->lock);
>>> - if (id->pasid != job->pasid || !id->pasid_mapping ||
>>> - !dma_fence_is_signaled(id->pasid_mapping))
>>> - pasid_mapping_needed = true;
>>> - mutex_unlock(&id_mgr->lock);
>>> -
>>> + need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
>>> gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>> pasid_mapping_needed &=
>>> adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>> ring->funcs->emit_wreg;
>>> + spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
>>> cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
>>> ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>>> &job->base.s_fence->scheduled == isolation->spearhead;
>>> if (!vm_flush_needed && !gds_switch_needed &&
>>> !need_pipe_sync &&
>>> - !cleaner_shader_needed)
>>> + !cleaner_shader_needed && !spm_update_needed)
>>> return 0;
>>> + /* Then actually prepare the submission frame */
>>> amdgpu_ring_ib_begin(ring);
>>> if (ring->funcs->init_cond_exec)
>>> patch = amdgpu_ring_init_cond_exec(ring,
>>> @@ -703,23 +678,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
>>> struct amdgpu_job *job,
>>> if (pasid_mapping_needed)
>>> amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
>>> - if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
>>> + if (spm_update_needed)
>>> adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
>>> - if (ring->funcs->emit_gds_switch &&
>>> - gds_switch_needed) {
>>> + if (gds_switch_needed)
>>> amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
>>> job->gds_size, job->gws_base,
>>> job->gws_size, job->oa_base,
>>> job->oa_size);
>>> - }
>>> if (vm_flush_needed || pasid_mapping_needed ||
>>> cleaner_shader_needed) {
>>> r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>> if (r)
>>> return r;
>>> + } else {
>>> + fence = NULL;
>>> + }
>>> +
>>> + amdgpu_ring_patch_cond_exec(ring, patch);
>>> +
>>> + /* the double SWITCH_BUFFER here *cannot* be skipped by
>>> COND_EXEC */
>>> + if (ring->funcs->emit_switch_buffer) {
>>> + amdgpu_ring_emit_switch_buffer(ring);
>>> + amdgpu_ring_emit_switch_buffer(ring);
>>> }
>>> + amdgpu_ring_ib_end(ring);
>>> +
>>> + /* And finally remember what the ring has executed */
>>> if (vm_flush_needed) {
>>> mutex_lock(&id_mgr->lock);
>>> dma_fence_put(id->last_flush);
>>> @@ -749,16 +735,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
>>> struct amdgpu_job *job,
>>> mutex_unlock(&adev->enforce_isolation_mutex);
>>> }
>>> dma_fence_put(fence);
>>> -
>>> - amdgpu_ring_patch_cond_exec(ring, patch);
>>> -
>>> - /* the double SWITCH_BUFFER here *cannot* be skipped by
>>> COND_EXEC */
>>> - if (ring->funcs->emit_switch_buffer) {
>>> - amdgpu_ring_emit_switch_buffer(ring);
>>> - amdgpu_ring_emit_switch_buffer(ring);
>>> - }
>>> -
>>> - amdgpu_ring_ib_end(ring);
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index daa2f9b33620..e9ecdb96bafa 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -493,7 +493,8 @@ int amdgpu_vm_validate(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm,
>>> struct ww_acquire_ctx *ticket,
>>> int (*callback)(void *p, struct amdgpu_bo *bo),
>>> void *param);
>>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>> *job, bool need_pipe_sync);
>>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>> + bool need_pipe_sync);
>>> int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>>> struct amdgpu_vm *vm, bool immediate);
>>> int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>> @@ -550,8 +551,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device
>>> *adev, uint32_t min_vm_size,
>>> uint32_t fragment_size_default, unsigned max_level,
>>> unsigned max_bits);
>>> int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct
>>> drm_file *filp);
>>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>>> - struct amdgpu_job *job);
>>> void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>> struct amdgpu_task_info *
More information about the amd-gfx
mailing list