[PATCH] drm/amdgpu: fix fundamental suspend/resume issue
Deucher, Alexander
Alexander.Deucher at amd.com
Wed May 10 18:59:02 UTC 2017
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Christian König
> Sent: Wednesday, May 10, 2017 2:09 PM
> To: amd-gfx at lists.freedesktop.org
> Subject: [PATCH] drm/amdgpu: fix fundamental suspend/resume issue
>
> From: Christian König <christian.koenig at amd.com>
>
> Reinitializing the VM manager during suspend/resume is a very very bad
> idea since all the VMs are still active and kicking.
>
> This can lead to random VM faults after resume when new processes
> become the same client ID assigned.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22
> +++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 15 ++-------------
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 15 ++-------------
> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 15 ++-------------
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++--------------
> 6 files changed, 30 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ed97a2e..9803392 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -790,6 +790,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device
> *adev, unsigned vmhub,
> struct amdgpu_vm_id_manager *id_mgr = &adev-
> >vm_manager.id_mgr[vmhub];
> struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
>
> + atomic64_set(&id->owner, 0);
> id->gds_base = 0;
> id->gds_size = 0;
> id->gws_base = 0;
> @@ -799,6 +800,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device
> *adev, unsigned vmhub,
> }
>
> /**
> + * amdgpu_vm_reset_all_id - reset VMID to zero
> + *
> + * @adev: amdgpu device structure
> + *
> + * Reset VMID to force flush on next use
> + */
> +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
> +{
> + unsigned i, j;
> +
> + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> + struct amdgpu_vm_id_manager *id_mgr =
> + &adev->vm_manager.id_mgr[i];
> +
> + for (j = 1; j < id_mgr->num_ids; ++j)
> + amdgpu_vm_reset_id(adev, i, j);
> + }
> +}
> +
> +/**
> * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
> *
> * @vm: requested vm
> @@ -2393,7 +2414,6 @@ void amdgpu_vm_manager_init(struct
> amdgpu_device *adev)
> for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
> adev->vm_manager.seqno[i] = 0;
>
> -
> atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
> atomic64_set(&adev->vm_manager.client_counter, 0);
> spin_lock_init(&adev->vm_manager.prt_lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index abc0bab..d62547d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -209,6 +209,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm,
> struct amdgpu_ring *ring,
> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
> void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
> unsigned vmid);
> +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
> int amdgpu_vm_update_directories(struct amdgpu_device *adev,
> struct amdgpu_vm *vm);
> int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index a572979..d860939 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - if (adev->vm_manager.enabled) {
> - gmc_v6_0_vm_fini(adev);
> - adev->vm_manager.enabled = false;
> - }
> gmc_v6_0_hw_fini(adev);
>
> return 0;
> @@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle)
> if (r)
> return r;
>
> - if (!adev->vm_manager.enabled) {
> - r = gmc_v6_0_vm_init(adev);
> - if (r) {
> - dev_err(adev->dev, "vm manager initialization failed
> (%d).\n", r);
> - return r;
> - }
> - adev->vm_manager.enabled = true;
> - }
> + amdgpu_vm_reset_all_ids(adev);
>
> - return r;
> + return 0;
> }
>
> static bool gmc_v6_0_is_idle(void *handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index a9083a1..2750e5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - if (adev->vm_manager.enabled) {
> - gmc_v7_0_vm_fini(adev);
> - adev->vm_manager.enabled = false;
> - }
> gmc_v7_0_hw_fini(adev);
>
> return 0;
> @@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle)
> if (r)
> return r;
>
> - if (!adev->vm_manager.enabled) {
> - r = gmc_v7_0_vm_init(adev);
> - if (r) {
> - dev_err(adev->dev, "vm manager initialization failed
> (%d).\n", r);
> - return r;
> - }
> - adev->vm_manager.enabled = true;
> - }
> + amdgpu_vm_reset_all_ids(adev);
>
> - return r;
> + return 0;
> }
>
> static bool gmc_v7_0_is_idle(void *handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 4ac9978..f56b408 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - if (adev->vm_manager.enabled) {
> - gmc_v8_0_vm_fini(adev);
> - adev->vm_manager.enabled = false;
> - }
> gmc_v8_0_hw_fini(adev);
>
> return 0;
> @@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle)
> if (r)
> return r;
>
> - if (!adev->vm_manager.enabled) {
> - r = gmc_v8_0_vm_init(adev);
> - if (r) {
> - dev_err(adev->dev, "vm manager initialization failed
> (%d).\n", r);
> - return r;
> - }
> - adev->vm_manager.enabled = true;
> - }
> + amdgpu_vm_reset_all_ids(adev);
>
> - return r;
> + return 0;
> }
>
> static bool gmc_v8_0_is_idle(void *handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b9f11fa..ae8fd91 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -797,10 +797,6 @@ static int gmc_v9_0_suspend(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> - if (adev->vm_manager.enabled) {
> - gmc_v9_0_vm_fini(adev);
> - adev->vm_manager.enabled = false;
> - }
> gmc_v9_0_hw_fini(adev);
>
> return 0;
> @@ -815,17 +811,9 @@ static int gmc_v9_0_resume(void *handle)
> if (r)
> return r;
>
> - if (!adev->vm_manager.enabled) {
> - r = gmc_v9_0_vm_init(adev);
> - if (r) {
> - dev_err(adev->dev,
> - "vm manager initialization failed (%d).\n", r);
> - return r;
> - }
> - adev->vm_manager.enabled = true;
> - }
> + amdgpu_vm_reset_all_ids(adev);
>
> - return r;
> + return 0;
> }
>
> static bool gmc_v9_0_is_idle(void *handle)
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list