[PATCH] drm/amdgpu: Fix page fault and kasan warning on pci device remove.

Christian König ckoenig.leichtzumerken at gmail.com
Wed Aug 22 06:57:08 UTC 2018


Am 21.08.2018 um 23:23 schrieb Andrey Grodzovsky:
> Problem:
> When executing echo 1 > /sys/class/drm/card0/device/remove kasan warning
> as bellow and page fault happen because adev->gart.pages already freed by the
> time amdgpu_gart_unbind is called.
>
> BUG: KASAN: user-memory-access in amdgpu_gart_unbind+0x98/0x180 [amdgpu]
> Write of size 8 at addr 0000000000003648 by task bash/1828
> CPU: 2 PID: 1828 Comm: bash Tainted: G        W  O      4.18.0-rc1-dev+ #29
> Hardware name: Gigabyte Technology Co., Ltd. AX370-Gaming/AX370-Gaming-CF, BIOS F3 06/19/2017
> Call Trace:
> dump_stack+0x71/0xab
> kasan_report+0x109/0x390
> amdgpu_gart_unbind+0x98/0x180 [amdgpu]
> ttm_tt_unbind+0x43/0x60 [ttm]
> ttm_bo_move_ttm+0x83/0x1c0 [ttm]
> ttm_bo_handle_move_mem+0xb97/0xd00 [ttm]
> ttm_bo_evict+0x273/0x530 [ttm]
> ttm_mem_evict_first+0x29c/0x360 [ttm]
> ttm_bo_force_list_clean+0xfc/0x210 [ttm]
> ttm_bo_clean_mm+0xe7/0x160 [ttm]
> amdgpu_ttm_fini+0xda/0x1d0 [amdgpu]
> amdgpu_bo_fini+0xf/0x60 [amdgpu]
> gmc_v8_0_sw_fini+0x36/0x70 [amdgpu]
> amdgpu_device_fini+0x2d0/0x7d0 [amdgpu]
> amdgpu_driver_unload_kms+0x6a/0xd0 [amdgpu]
> drm_dev_unregister+0x79/0x180 [drm]
> amdgpu_pci_remove+0x2a/0x60 [amdgpu]
> pci_device_remove+0x5b/0x100
> device_release_driver_internal+0x236/0x360
> pci_stop_bus_device+0xbf/0xf0
> pci_stop_and_remove_bus_device_locked+0x16/0x30
> remove_store+0xda/0xf0
> kernfs_fop_write+0x186/0x220
>   __vfs_write+0xcc/0x330
> vfs_write+0xe6/0x250
> ksys_write+0xb1/0x140
> do_syscall_64+0x77/0x1e0
> entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x7f66ebbb32c0
>
> Fix:
> Split gmc_v{6,7,8,9}_0_gart_fini to pospone amdgpu_gart_fini to after
> memory managers are shut down since gart unbind happens
> as part of this procudure.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   1                                     |  0
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c |  9 ++-------
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++--------------
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++--------------
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++--------------
>   5 files changed, 8 insertions(+), 49 deletions(-)
>   create mode 100644 1
>
> diff --git a/1 b/1
> new file mode 100644
> index 0000000..e69de29

Good cleanup, but what the heck is that?

Christian.

> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index c14cf1c..0a0a4dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -633,12 +633,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
>   	amdgpu_gart_table_vram_unpin(adev);
>   }
>   
> -static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
> -{
> -	amdgpu_gart_table_vram_free(adev);
> -	amdgpu_gart_fini(adev);
> -}
> -
>   static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
>   				     u32 status, u32 addr, u32 mc_client)
>   {
> @@ -936,8 +930,9 @@ static int gmc_v6_0_sw_fini(void *handle)
>   
>   	amdgpu_gem_force_release(adev);
>   	amdgpu_vm_manager_fini(adev);
> -	gmc_v6_0_gart_fini(adev);
> +	amdgpu_gart_table_vram_free(adev);
>   	amdgpu_bo_fini(adev);
> +	amdgpu_gart_fini(adev);
>   	release_firmware(adev->gmc.fw);
>   	adev->gmc.fw = NULL;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 0c3a161..afbadfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -750,19 +750,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
>   }
>   
>   /**
> - * gmc_v7_0_gart_fini - vm fini callback
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Tears down the driver GART/VM setup (CIK).
> - */
> -static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
> -{
> -	amdgpu_gart_table_vram_free(adev);
> -	amdgpu_gart_fini(adev);
> -}
> -
> -/**
>    * gmc_v7_0_vm_decode_fault - print human readable fault info
>    *
>    * @adev: amdgpu_device pointer
> @@ -1091,8 +1078,9 @@ static int gmc_v7_0_sw_fini(void *handle)
>   
>   	amdgpu_gem_force_release(adev);
>   	amdgpu_vm_manager_fini(adev);
> -	gmc_v7_0_gart_fini(adev);
> +	amdgpu_gart_table_vram_free(adev);
>   	amdgpu_bo_fini(adev);
> +	amdgpu_gart_fini(adev);
>   	release_firmware(adev->gmc.fw);
>   	adev->gmc.fw = NULL;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 274c932..d871dae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -969,19 +969,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
>   }
>   
>   /**
> - * gmc_v8_0_gart_fini - vm fini callback
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Tears down the driver GART/VM setup (CIK).
> - */
> -static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
> -{
> -	amdgpu_gart_table_vram_free(adev);
> -	amdgpu_gart_fini(adev);
> -}
> -
> -/**
>    * gmc_v8_0_vm_decode_fault - print human readable fault info
>    *
>    * @adev: amdgpu_device pointer
> @@ -1192,8 +1179,9 @@ static int gmc_v8_0_sw_fini(void *handle)
>   
>   	amdgpu_gem_force_release(adev);
>   	amdgpu_vm_manager_fini(adev);
> -	gmc_v8_0_gart_fini(adev);
> +	amdgpu_gart_table_vram_free(adev);
>   	amdgpu_bo_fini(adev);
> +	amdgpu_gart_fini(adev);
>   	release_firmware(adev->gmc.fw);
>   	adev->gmc.fw = NULL;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0bf8439..46183c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1003,26 +1003,12 @@ static int gmc_v9_0_sw_init(void *handle)
>   	return 0;
>   }
>   
> -/**
> - * gmc_v9_0_gart_fini - vm fini callback
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Tears down the driver GART/VM setup (CIK).
> - */
> -static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
> -{
> -	amdgpu_gart_table_vram_free(adev);
> -	amdgpu_gart_fini(adev);
> -}
> -
>   static int gmc_v9_0_sw_fini(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
>   	amdgpu_gem_force_release(adev);
>   	amdgpu_vm_manager_fini(adev);
> -	gmc_v9_0_gart_fini(adev);
>   
>   	/*
>   	* TODO:
> @@ -1035,7 +1021,9 @@ static int gmc_v9_0_sw_fini(void *handle)
>   	*/
>   	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
>   
> +	amdgpu_gart_table_vram_free(adev);
>   	amdgpu_bo_fini(adev);
> +	amdgpu_gart_fini(adev);
>   
>   	return 0;
>   }



More information about the amd-gfx mailing list