[PATCH 2/4] drm/amdgpu: add work function for GPU reset
Christian König
christian.koenig at amd.com
Mon Jan 24 16:49:56 UTC 2022
Am 24.01.22 um 17:46 schrieb Sharma, Shashank:
>
>
> On 1/24/2022 8:17 AM, Christian König wrote:
>> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>>> From: Shashank Sharma <shashank.sharma at amd.com>
>>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>>
>>> This patch adds a new work function, which will get scheduled
>>> in event of a GPU reset, and will send a uevent to indicate the same.
>>> The userspace can do some post-processing work like collecting data
>>> from a trace event.
>>>
>>> Cc: Alexander Deucher <alexander.deucher at amd.com>
>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>> 2 files changed, 16 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 269437b01328..79192f43bb71 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>> struct work_struct xgmi_reset_work;
>>> struct list_head reset_list;
>>>
>>> + struct work_struct gpu_reset_work;
>>
>> This needs a different name. "gpu_reset_work" would indicate that it
>> does the GPU reset, but this really only signals the reset to userspace.
>>
> Agree, let me come back with something like gpu_reset_housekeeping or
> something less weird that that :)
How about send_gpu_reset_signal ?
Christian.
>
>> Christian.
>>
>>> +
>>> long gfx_timeout;
>>> long sdma_timeout;
>>> long video_timeout;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index af9bdf16eefd..e29e58240869 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -72,6 +72,7 @@
>>> #include <linux/pm_runtime.h>
>>>
>>> #include <drm/drm_drv.h>
>>> +#include <drm/drm_sysfs.h>
>>>
>>> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
>>> amdgpu_device *adev)
>>> return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>> }
>>>
>>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>>> +{
>>> + struct amdgpu_device *adev =
>>> + container_of(__work, struct amdgpu_device, gpu_reset_work);
>>> +
>>> + /*
>>> + * Inform userspace that a GPU reset happened, and it should
>>> collect
>>> + * data from the trace event.
>>> + */
>>> + drm_sysfs_gpu_reset_event(&adev->ddev);
>>> +}
>>> +
>>> static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>> {
>>> struct amdgpu_device *adev =
>>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>> amdgpu_device_delay_enable_gfx_off);
>>>
>>> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>> + INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>>
>>> adev->gfx.gfx_off_req_count = 1;
>>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>
More information about the amd-gfx
mailing list