[PATCH] drm/amdgpu: Always emit GDS switch when GDS/GWS/OA is used
Christian König
christian.koenig at amd.com
Fri Jul 7 06:56:58 UTC 2023
Am 07.07.23 um 08:28 schrieb Friedrich Vock:
> During gfxoff, the per-VMID GDS registers are reset and not restored
> afterwards.
Hui? Since when? Those registers should be part of the saved ones.
Have you found that by observation?
Thanks,
Christian.
> The kernel needs to emit a GDS switch to manually update the
> GWS registers in this case. Since gfxoff can happen between any two
> submissions and the kernel has no way of knowing, emit the GDS switch
> before every submission.
>
> Fixes: 56b0989e29 ("drm/amdgpu: fix GDS/GWS/OA switch handling")
> Cc: stable at vger.kernel.org
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2530
> Signed-off-by: Friedrich Vock <friedrich.vock at gmx.de>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 22 +++++++---------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++++--
> 3 files changed, 15 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index ff1ea99292fb..de73797e9279 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -165,24 +165,17 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
> atomic_read(&adev->gpu_reset_counter);
> }
>
> -/* Check if we need to switch to another set of resources */
> -static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
> - struct amdgpu_job *job)
> -{
> - return id->gds_base != job->gds_base ||
> - id->gds_size != job->gds_size ||
> - id->gws_base != job->gws_base ||
> - id->gws_size != job->gws_size ||
> - id->oa_base != job->oa_base ||
> - id->oa_size != job->oa_size;
> -}
> -
> /* Check if the id is compatible with the job */
> static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
> struct amdgpu_job *job)
> {
> return id->pd_gpu_addr == job->vm_pd_addr &&
> - !amdgpu_vmid_gds_switch_needed(id, job);
> + id->gds_base == job->gds_base &&
> + id->gds_size == job->gds_size &&
> + id->gws_base == job->gws_base &&
> + id->gws_size == job->gws_size &&
> + id->oa_base == job->oa_base &&
> + id->oa_size == job->oa_size;
> }
>
> /**
> @@ -434,7 +427,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
> list_move_tail(&id->list, &id_mgr->ids_lru);
> }
>
> - job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
> if (job->vm_needs_flush) {
> id->flushed_updates = amdgpu_vm_tlb_seq(vm);
> dma_fence_put(id->last_flush);
> @@ -503,7 +495,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> * @vmhub: vmhub type
> * @vmid: vmid number to use
> *
> - * Reset saved GDW, GWS and OA to force switch on next flush.
> + * Reset saved GDS, GWS and OA data.
> */
> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
> unsigned vmid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index a963a25ddd62..2898508b1ce4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -53,7 +53,6 @@ struct amdgpu_job {
> uint32_t preamble_status;
> uint32_t preemption_status;
> bool vm_needs_flush;
> - bool gds_switch_needed;
> bool spm_update_needed;
> uint64_t vm_pd_addr;
> unsigned vmid;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 291977b93b1d..61856040cae2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -557,6 +557,12 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
> }
> }
>
> +/* Check if the job needs a GDS switch */
> +static bool amdgpu_vm_need_gds_switch(struct amdgpu_job *job)
> +{
> + return job->gds_size || job->gws_size || job->oa_size;
> +}
> +
> /**
> * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
> *
> @@ -579,7 +585,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
> if (job->vm_needs_flush || ring->has_compute_vm_bug)
> return true;
>
> - if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
> + if (ring->funcs->emit_gds_switch && amdgpu_vm_need_gds_switch(job))
> return true;
>
> if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
> @@ -609,7 +615,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
> bool spm_update_needed = job->spm_update_needed;
> bool gds_switch_needed = ring->funcs->emit_gds_switch &&
> - job->gds_switch_needed;
> + amdgpu_vm_need_gds_switch(job);
> bool vm_flush_needed = job->vm_needs_flush;
> struct dma_fence *fence = NULL;
> bool pasid_mapping_needed = false;
> --
> 2.41.0
>
More information about the amd-gfx
mailing list