[PATCH 2/2] drm/amdgpu: cleanup amdgpu_vm_flush v7

Thu May 15 07:41:34 UTC 2025

On 5/9/2025 12:03 AM, Alex Deucher wrote:
> On Thu, May 8, 2025 at 8:05 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> Check if the cleaner shader should run directly.
>>
>> While at it remove amdgpu_vm_need_pipeline_sync(), we also check again
>> if the VMID has seen a GPU reset since last use and the gds switch
>> setiing can be handled more simply as well.
>>
>> Also remove some duplicate checks and re-order and document the code.
>>
>> v2: restructure the while function
>> v3: fix logic error pointed out by Srini
>> v4: fix typo in comment, fix crash caused by incorrect check
>> v5: once more fix the logic
>> v6: separate cleaner shader checks as suggested by Srini
>> v7: re-order incorrect check
>> v8: separate the revert
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 106 ++++++++++---------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   5 +-
>>   3 files changed, 46 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 5eab1c1a380c..30b58772598c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -189,10 +189,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>>          }
>>
>>          need_ctx_switch = ring->current_ctx != fence_ctx;
>> -       if (ring->funcs->emit_pipeline_sync && job &&
>> -           ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
>> -            (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
>> -            amdgpu_vm_need_pipeline_sync(ring, job))) {
>> +       if ((job && (tmp = amdgpu_sync_get_fence(&job->explicit_sync))) ||
>> +            (amdgpu_sriov_vf(adev) && need_ctx_switch)) {
>>                  need_pipe_sync = true;
>>
>>                  if (tmp)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0a80c011e678..31c423663b54 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -707,37 +707,6 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
>>          }
>>   }
>>
>> -/**
>> - * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
>> - *
>> - * @ring: ring on which the job will be submitted
>> - * @job: job to submit
>> - *
>> - * Returns:
>> - * True if sync is needed.
>> - */
>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>> -                                 struct amdgpu_job *job)
>> -{
>> -       struct amdgpu_device *adev = ring->adev;
>> -       unsigned vmhub = ring->vm_hub;
>> -       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>> -
>> -       if (job->vmid == 0)
>> -               return false;
>> -
>> -       if (job->vm_needs_flush || ring->has_compute_vm_bug)
>> -               return true;
>> -
>> -       if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
>> -               return true;
>> -
>> -       if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
>> -               return true;
>> -
>> -       return false;
>> -}
>> -
>>   /**
>>    * amdgpu_vm_flush - hardware flush the vm
>>    *
>> @@ -758,44 +727,52 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>          unsigned vmhub = ring->vm_hub;
>>          struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>>          struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
>> -       bool spm_update_needed = job->spm_update_needed;
>> -       bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>> -               job->gds_switch_needed;
>> -       bool vm_flush_needed = job->vm_needs_flush;
>> -       bool cleaner_shader_needed = false;
>> -       bool pasid_mapping_needed = false;
>> -       struct dma_fence *fence = NULL;
>> +       bool gds_switch_needed, vm_flush_needed, spm_update_needed,
>> +            cleaner_shader_needed, pasid_mapping_needed;
> Would be good to document what all of these flags are used for.  E.g
>
> /* need_pipe_sync - if set, we wait for the last fence on the ring to
> signal before executing more commands
>   * cleaner_shader_needed - if set we emit the cleaner shader to clean
> up GPRs and LDS before a new command is submitted
>   * etc.
>   */
>
>> +       struct dma_fence *fence;
>>          unsigned int patch;
>>          int r;
>>
>> +       /* First of all figure out what needs to be done */
>>          if (amdgpu_vmid_had_gpu_reset(adev, id)) {
> Please add a comment here to explain why we set all of these to true
> if we had a GPU reset.
>
>> +               need_pipe_sync = true;
>>                  gds_switch_needed = true;
>>                  vm_flush_needed = true;
>>                  pasid_mapping_needed = true;
>>                  spm_update_needed = true;
>> +               cleaner_shader_needed = true;

Hi Christian,

may I please have your thoughts onto this. ie., "cleaner_shader_needed = 
true;" wrt GPU reset
ie., why do we expect "cleaner_shader_needed" to be emitted during "GPU 
reset" process?,
because even in normal bootup case, currently we go and enable
process_isolation/trigger cleaner shader manually, only as and when
needed.

Best regards,
Srini

>> +       } else {
> Would be good to document all of these cases as well.
>
>> +               gds_switch_needed = job->gds_switch_needed;
>> +               vm_flush_needed = job->vm_needs_flush;
>> +               mutex_lock(&id_mgr->lock);
>> +               pasid_mapping_needed = id->pasid != job->pasid ||
>> +                       !id->pasid_mapping ||
>> +                       !dma_fence_is_signaled(id->pasid_mapping);
>> +               mutex_unlock(&id_mgr->lock);
>> +               spm_update_needed = job->spm_update_needed;
> E.g.:
> /* The spearhead marks the first submission from a new client.  We
> need to run the cleaner shader
>   * if it is requested by the job and we have a new spearhead so that
> we clean up before it runs.
>   */
>
>> +               cleaner_shader_needed = job->run_cleaner_shader &&
>> +                       job->base.s_fence && &job->base.s_fence->scheduled ==
>> +                       isolation->spearhead;
> E.g.,
> /* This will cause the queue to wait for the current fence on the ring
> to signal before new work executes (wait for idle).
>   * This is needed as a workaround for some hardware
> (ring->has_compute_vm_bug), if we are updating
>   * the vmid or page tables (vm_flush_needed), if we need to emit the
> cleaner shader (which must execute while the
>   * queue is idle), or if the job uses gds and we need to update the
> gds mappings (gds_switch_needed).
>   */
>
>> +               need_pipe_sync |= ring->has_compute_vm_bug || vm_flush_needed ||
>> +                       cleaner_shader_needed || gds_switch_needed;
>>          }
>>
>> -       mutex_lock(&id_mgr->lock);
>> -       if (id->pasid != job->pasid || !id->pasid_mapping ||
>> -           !dma_fence_is_signaled(id->pasid_mapping))
>> -               pasid_mapping_needed = true;
>> -       mutex_unlock(&id_mgr->lock);
>> -
>> +       /* Then check the pre-requisites */
>> +       need_pipe_sync &= !!ring->funcs->emit_pipeline_sync;
>>          gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>          vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
>>                          job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>          pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>                  ring->funcs->emit_wreg;
>> -
>> -       cleaner_shader_needed = job->run_cleaner_shader &&
>> -               adev->gfx.enable_cleaner_shader &&
>> -               ring->funcs->emit_cleaner_shader && job->base.s_fence &&
>> -               &job->base.s_fence->scheduled == isolation->spearhead;
>> +       spm_update_needed &= !!adev->gfx.rlc.funcs->update_spm_vmid;
>> +       cleaner_shader_needed &= adev->gfx.enable_cleaner_shader &&
>> +               ring->funcs->emit_cleaner_shader;
>>
>>          if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
>> -           !cleaner_shader_needed)
>> +           !cleaner_shader_needed && !spm_update_needed)
>>                  return 0;
>>
>> +       /* Then actually prepare the submission frame */
>>          amdgpu_ring_ib_begin(ring);
>>          if (ring->funcs->init_cond_exec)
>>                  patch = amdgpu_ring_init_cond_exec(ring,
>> @@ -815,23 +792,34 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>          if (pasid_mapping_needed)
>>                  amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
>>
>> -       if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
>> +       if (spm_update_needed)
>>                  adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
>>
>> -       if (ring->funcs->emit_gds_switch &&
>> -           gds_switch_needed) {
>> +       if (gds_switch_needed)
>>                  amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
>>                                              job->gds_size, job->gws_base,
>>                                              job->gws_size, job->oa_base,
>>                                              job->oa_size);
>> -       }
>>
>>          if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
>>                  r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>>                  if (r)
>>                          return r;
>> +       } else {
>> +               fence = NULL;
>> +       }
>> +
>> +       amdgpu_ring_patch_cond_exec(ring, patch);
>> +
>> +       /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> +       if (ring->funcs->emit_switch_buffer) {
>> +               amdgpu_ring_emit_switch_buffer(ring);
>> +               amdgpu_ring_emit_switch_buffer(ring);
>>          }
>>
>> +       amdgpu_ring_ib_end(ring);
>> +
>> +       /* And finally remember what the ring has executed */
>>          if (vm_flush_needed) {
>>                  mutex_lock(&id_mgr->lock);
>>                  dma_fence_put(id->last_flush);
>> @@ -861,16 +849,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>                  mutex_unlock(&adev->enforce_isolation_mutex);
>>          }
>>          dma_fence_put(fence);
>> -
>> -       amdgpu_ring_patch_cond_exec(ring, patch);
>> -
>> -       /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> -       if (ring->funcs->emit_switch_buffer) {
>> -               amdgpu_ring_emit_switch_buffer(ring);
>> -               amdgpu_ring_emit_switch_buffer(ring);
>> -       }
>> -
>> -       amdgpu_ring_ib_end(ring);
>>          return 0;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index f3ad687125ad..c9578b7f670c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -498,7 +498,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>                         struct ww_acquire_ctx *ticket,
>>                         int (*callback)(void *p, struct amdgpu_bo *bo),
>>                         void *param);
>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>> +                   bool need_pipe_sync);
>>   int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>>                            struct amdgpu_vm *vm, bool immediate);
>>   int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>> @@ -559,8 +560,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>>                             uint32_t fragment_size_default, unsigned max_level,
>>                             unsigned max_bits);
>>   int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>> -bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>> -                                 struct amdgpu_job *job);
>>   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>
>>   struct amdgpu_task_info *
>> --
>> 2.34.1
>>