[PATCH] drm/amdgpu: Fix page fault and kasan warning on pci device remove.

Andrey Grodzovsky Andrey.Grodzovsky at amd.com
Wed Aug 22 14:04:31 UTC 2018



On 08/22/2018 02:57 AM, Christian König wrote:
> Am 21.08.2018 um 23:23 schrieb Andrey Grodzovsky:
>> Problem:
>> When executing echo 1 > /sys/class/drm/card0/device/remove kasan warning
>> as bellow and page fault happen because adev->gart.pages already 
>> freed by the
>> time amdgpu_gart_unbind is called.
>>
>> BUG: KASAN: user-memory-access in amdgpu_gart_unbind+0x98/0x180 [amdgpu]
>> Write of size 8 at addr 0000000000003648 by task bash/1828
>> CPU: 2 PID: 1828 Comm: bash Tainted: G        W  O 4.18.0-rc1-dev+ #29
>> Hardware name: Gigabyte Technology Co., Ltd. 
>> AX370-Gaming/AX370-Gaming-CF, BIOS F3 06/19/2017
>> Call Trace:
>> dump_stack+0x71/0xab
>> kasan_report+0x109/0x390
>> amdgpu_gart_unbind+0x98/0x180 [amdgpu]
>> ttm_tt_unbind+0x43/0x60 [ttm]
>> ttm_bo_move_ttm+0x83/0x1c0 [ttm]
>> ttm_bo_handle_move_mem+0xb97/0xd00 [ttm]
>> ttm_bo_evict+0x273/0x530 [ttm]
>> ttm_mem_evict_first+0x29c/0x360 [ttm]
>> ttm_bo_force_list_clean+0xfc/0x210 [ttm]
>> ttm_bo_clean_mm+0xe7/0x160 [ttm]
>> amdgpu_ttm_fini+0xda/0x1d0 [amdgpu]
>> amdgpu_bo_fini+0xf/0x60 [amdgpu]
>> gmc_v8_0_sw_fini+0x36/0x70 [amdgpu]
>> amdgpu_device_fini+0x2d0/0x7d0 [amdgpu]
>> amdgpu_driver_unload_kms+0x6a/0xd0 [amdgpu]
>> drm_dev_unregister+0x79/0x180 [drm]
>> amdgpu_pci_remove+0x2a/0x60 [amdgpu]
>> pci_device_remove+0x5b/0x100
>> device_release_driver_internal+0x236/0x360
>> pci_stop_bus_device+0xbf/0xf0
>> pci_stop_and_remove_bus_device_locked+0x16/0x30
>> remove_store+0xda/0xf0
>> kernfs_fop_write+0x186/0x220
>>   __vfs_write+0xcc/0x330
>> vfs_write+0xe6/0x250
>> ksys_write+0xb1/0x140
>> do_syscall_64+0x77/0x1e0
>> entry_SYSCALL_64_after_hwframe+0x44/0xa9
>> RIP: 0033:0x7f66ebbb32c0
>>
>> Fix:
>> Split gmc_v{6,7,8,9}_0_gart_fini to pospone amdgpu_gart_fini to after
>> memory managers are shut down since gart unbind happens
>> as part of this procudure.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   1                                     |  0
>>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c |  9 ++-------
>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++--------------
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++--------------
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++--------------
>>   5 files changed, 8 insertions(+), 49 deletions(-)
>>   create mode 100644 1
>>
>> diff --git a/1 b/1
>> new file mode 100644
>> index 0000000..e69de29
>
> Good cleanup, but what the heck is that?
>
> Christian.

Yea, git add *
I will fix and push.

Andrey
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> index c14cf1c..0a0a4dc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> @@ -633,12 +633,6 @@ static void gmc_v6_0_gart_disable(struct 
>> amdgpu_device *adev)
>>       amdgpu_gart_table_vram_unpin(adev);
>>   }
>>   -static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
>> -{
>> -    amdgpu_gart_table_vram_free(adev);
>> -    amdgpu_gart_fini(adev);
>> -}
>> -
>>   static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
>>                        u32 status, u32 addr, u32 mc_client)
>>   {
>> @@ -936,8 +930,9 @@ static int gmc_v6_0_sw_fini(void *handle)
>>         amdgpu_gem_force_release(adev);
>>       amdgpu_vm_manager_fini(adev);
>> -    gmc_v6_0_gart_fini(adev);
>> +    amdgpu_gart_table_vram_free(adev);
>>       amdgpu_bo_fini(adev);
>> +    amdgpu_gart_fini(adev);
>>       release_firmware(adev->gmc.fw);
>>       adev->gmc.fw = NULL;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 0c3a161..afbadfc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -750,19 +750,6 @@ static void gmc_v7_0_gart_disable(struct 
>> amdgpu_device *adev)
>>   }
>>     /**
>> - * gmc_v7_0_gart_fini - vm fini callback
>> - *
>> - * @adev: amdgpu_device pointer
>> - *
>> - * Tears down the driver GART/VM setup (CIK).
>> - */
>> -static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
>> -{
>> -    amdgpu_gart_table_vram_free(adev);
>> -    amdgpu_gart_fini(adev);
>> -}
>> -
>> -/**
>>    * gmc_v7_0_vm_decode_fault - print human readable fault info
>>    *
>>    * @adev: amdgpu_device pointer
>> @@ -1091,8 +1078,9 @@ static int gmc_v7_0_sw_fini(void *handle)
>>         amdgpu_gem_force_release(adev);
>>       amdgpu_vm_manager_fini(adev);
>> -    gmc_v7_0_gart_fini(adev);
>> +    amdgpu_gart_table_vram_free(adev);
>>       amdgpu_bo_fini(adev);
>> +    amdgpu_gart_fini(adev);
>>       release_firmware(adev->gmc.fw);
>>       adev->gmc.fw = NULL;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 274c932..d871dae 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -969,19 +969,6 @@ static void gmc_v8_0_gart_disable(struct 
>> amdgpu_device *adev)
>>   }
>>     /**
>> - * gmc_v8_0_gart_fini - vm fini callback
>> - *
>> - * @adev: amdgpu_device pointer
>> - *
>> - * Tears down the driver GART/VM setup (CIK).
>> - */
>> -static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
>> -{
>> -    amdgpu_gart_table_vram_free(adev);
>> -    amdgpu_gart_fini(adev);
>> -}
>> -
>> -/**
>>    * gmc_v8_0_vm_decode_fault - print human readable fault info
>>    *
>>    * @adev: amdgpu_device pointer
>> @@ -1192,8 +1179,9 @@ static int gmc_v8_0_sw_fini(void *handle)
>>         amdgpu_gem_force_release(adev);
>>       amdgpu_vm_manager_fini(adev);
>> -    gmc_v8_0_gart_fini(adev);
>> +    amdgpu_gart_table_vram_free(adev);
>>       amdgpu_bo_fini(adev);
>> +    amdgpu_gart_fini(adev);
>>       release_firmware(adev->gmc.fw);
>>       adev->gmc.fw = NULL;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0bf8439..46183c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -1003,26 +1003,12 @@ static int gmc_v9_0_sw_init(void *handle)
>>       return 0;
>>   }
>>   -/**
>> - * gmc_v9_0_gart_fini - vm fini callback
>> - *
>> - * @adev: amdgpu_device pointer
>> - *
>> - * Tears down the driver GART/VM setup (CIK).
>> - */
>> -static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
>> -{
>> -    amdgpu_gart_table_vram_free(adev);
>> -    amdgpu_gart_fini(adev);
>> -}
>> -
>>   static int gmc_v9_0_sw_fini(void *handle)
>>   {
>>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>         amdgpu_gem_force_release(adev);
>>       amdgpu_vm_manager_fini(adev);
>> -    gmc_v9_0_gart_fini(adev);
>>         /*
>>       * TODO:
>> @@ -1035,7 +1021,9 @@ static int gmc_v9_0_sw_fini(void *handle)
>>       */
>>       amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
>>   +    amdgpu_gart_table_vram_free(adev);
>>       amdgpu_bo_fini(adev);
>> +    amdgpu_gart_fini(adev);
>>         return 0;
>>   }
>



More information about the amd-gfx mailing list