[PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
Matt Coffin
mcoffin13 at gmail.com
Mon Nov 11 23:28:41 UTC 2019
Thanks Evan. I can confirm that the linked patch resolves the issue for me.
I commented and resolved the bug as well in case other people find it.
Cheers,
Matt
On 11/11/19 2:28 AM, Quan, Evan wrote:
> Just sent out a patch which should be able to address this issue.
> https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
>
> Regards,
> Evan
>> -----Original Message-----
>> From: Matt Coffin <mcoffin13 at gmail.com>
>> Sent: Saturday, November 9, 2019 4:50 AM
>> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
>> Cc: Li, Candice <Candice.Li at amd.com>; Gui, Jack <Jack.Gui at amd.com>; Alex
>> Deucher <alexdeucher at gmail.com>
>> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>>
>> Hey guys,
>>
>>
>>
>> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
>> expert since everything I know comes from just reading through the code, so
>> this could be some kind of intended behavior, but after this patch, if you write a
>> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
>> successfully, and the result is seemingly an unrecoverable situation.
>>
>>
>>
>> I put in a report on bugzilla with dmesg logs
>> :
>> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>>
>>
>> Finding this change was the result of a bisect to find where the issue started,
>> and reverting the changes to smu_hw_fini resolved the issue.
>> Any advice on possible proper fixes?
>>
>> Thanks in advance,
>>
>> Matt
>>
>> On 9/2/19 9:44 PM, Quan, Evan wrote:
>>> These are needed for smu_reset support.
>>>
>>> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
>>> Signed-off-by: Evan Quan <evan.quan at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17
>> +++++++++++++++++
>>> drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
>>> drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
>>> 3 files changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> index d5ee13a78eb7..3cf8d944f890 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>>> return ret;
>>> }
>>>
>>> +static int smu_stop_dpms(struct smu_context *smu) {
>>> + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
>>> +
>>> static int smu_hw_fini(void *handle)
>>> {
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>>> -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>>> smu_powergate_vcn(&adev->smu, true);
>>> }
>>>
>>> + ret = smu_stop_thermal_control(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop thermal control!\n");
>>> + return ret;
>>> + }
>>> +
>>> + ret = smu_stop_dpms(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop Dpms!\n");
>>> + return ret;
>>> + }
>>> +
>>> kfree(table_context->driver_pptable);
>>> table_context->driver_pptable = NULL;
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> index b19224cb6d6d..8e4b0ad24712 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> @@ -498,6 +498,7 @@ struct smu_funcs
>>> int (*get_current_clk_freq)(struct smu_context *smu, enum
>> smu_clk_type clk_id, uint32_t *value);
>>> int (*init_max_sustainable_clocks)(struct smu_context *smu);
>>> int (*start_thermal_control)(struct smu_context *smu);
>>> + int (*stop_thermal_control)(struct smu_context *smu);
>>> int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
>> sensor,
>>> void *data, uint32_t *size);
>>> int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
>>> clk); @@ -647,6 +648,8 @@ struct smu_funcs
>>> ((smu)->ppt_funcs->set_thermal_fan_table ?
>>> (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define
>> smu_start_thermal_control(smu) \
>>> ((smu)->funcs->start_thermal_control?
>>> (smu)->funcs->start_thermal_control((smu)) : 0)
>>> +#define smu_stop_thermal_control(smu) \
>>> + ((smu)->funcs->stop_thermal_control?
>>> +(smu)->funcs->stop_thermal_control((smu)) : 0)
>>> #define smu_read_sensor(smu, sensor, data, size) \
>>> ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
>>> read_sensor((smu),
>>> (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu,
>>> sensor, data, size) \ diff --git
>>> a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> index db5e94ce54af..1a38af84394e 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> @@ -1209,6 +1209,15 @@ static int
>> smu_v11_0_start_thermal_control(struct smu_context *smu)
>>> return ret;
>>> }
>>>
>>> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
>>> + struct amdgpu_device *adev = smu->adev;
>>> +
>>> + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static uint16_t convert_to_vddc(uint8_t vid) {
>>> return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
>>> -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>>> .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>>> .init_max_sustainable_clocks =
>> smu_v11_0_init_max_sustainable_clocks,
>>> .start_thermal_control = smu_v11_0_start_thermal_control,
>>> + .stop_thermal_control = smu_v11_0_stop_thermal_control,
>>> .read_sensor = smu_v11_0_read_sensor,
>>> .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>>> .display_clock_voltage_request =
>>> smu_v11_0_display_clock_voltage_request,
>>>
More information about the amd-gfx
mailing list