[PATCH 5/8] drm/amdgpu: rework how the cleaner shader is emitted v3

Fri Mar 14 04:18:48 UTC 2025

On 2/18/2025 9:43 PM, Christian König wrote:
> Instead of emitting the cleaner shader for every job which has the
> enforce_isolation flag set only emit it for the first submission from
> every client.
>
> v2: add missing NULL check
> v3: fix another NULL pointer deref
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 ++++++++++++++++++++------
>   1 file changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c9c48b782ec1..5323dba2d688 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -643,6 +643,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   		    bool need_pipe_sync)
>   {
>   	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
>   	unsigned vmhub = ring->vm_hub;
>   	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>   	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
> @@ -650,8 +651,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
>   		job->gds_switch_needed;
>   	bool vm_flush_needed = job->vm_needs_flush;
> -	struct dma_fence *fence = NULL;
> +	bool cleaner_shader_needed = false;
>   	bool pasid_mapping_needed = false;
> +	struct dma_fence *fence = NULL;
>   	unsigned int patch;
>   	int r;
>   
> @@ -674,8 +676,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
>   		ring->funcs->emit_wreg;
>   
> +	cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
> +		ring->funcs->emit_cleaner_shader && job->base.s_fence &&
> +		&job->base.s_fence->scheduled == isolation->spearhead;
> +
>   	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
> -	    !(job->enforce_isolation && !job->vmid))
> +	    !cleaner_shader_needed)
>   		return 0;
>   
>   	amdgpu_ring_ib_begin(ring);
> @@ -686,9 +692,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   	if (need_pipe_sync)
>   		amdgpu_ring_emit_pipeline_sync(ring);
>   
> -	if (adev->gfx.enable_cleaner_shader &&
> -	    ring->funcs->emit_cleaner_shader &&
> -	    job->enforce_isolation)
> +	if (cleaner_shader_needed)
>   		ring->funcs->emit_cleaner_shader(ring);
>   
>   	if (vm_flush_needed) {
> @@ -710,7 +714,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   					    job->oa_size);
>   	}
>   
> -	if (vm_flush_needed || pasid_mapping_needed) {
> +	if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
>   		r = amdgpu_fence_emit(ring, &fence, NULL, 0);
>   		if (r)
>   			return r;
> @@ -732,6 +736,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>   		id->pasid_mapping = dma_fence_get(fence);
>   		mutex_unlock(&id_mgr->lock);
>   	}
> +
> +	/*
> +	 * Make sure that all other submissions wait for the cleaner shader to
> +	 * finish before we push them to the HW.
> +	 */
> +	if (cleaner_shader_needed) {
> +		mutex_lock(&adev->enforce_isolation_mutex);
Should we need to ensure spearhead is valid before putting?
if (isolation->spearhead)
         dma_fence_put(isolation->spearhead);
> +		dma_fence_put(isolation->spearhead);
> +		isolation->spearhead = dma_fence_get(fence);
> +		mutex_unlock(&adev->enforce_isolation_mutex);
> +	}
>   	dma_fence_put(fence);
>   
>   	amdgpu_ring_patch_cond_exec(ring, patch);