[PATCH V2 01/17] drm/amd/pm: do not expose implementation details to other blocks out of power

Wed Dec 1 03:33:13 UTC 2021

On 12/1/2021 7:29 AM, Quan, Evan wrote:
> [AMD Official Use Only]
> 
> 
> 
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
>> Lazar, Lijo
>> Sent: Tuesday, November 30, 2021 4:10 PM
>> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Feng, Kenneth
>> <Kenneth.Feng at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
>> Subject: Re: [PATCH V2 01/17] drm/amd/pm: do not expose implementation
>> details to other blocks out of power
>>
>>
>>
>> On 11/30/2021 1:12 PM, Evan Quan wrote:
>>> Those implementation details(whether swsmu supported, some ppt_funcs
>>> supported, accessing internal statistics ...)should be kept
>>> internally. It's not a good practice and even error prone to expose
>> implementation details.
>>>
>>> Signed-off-by: Evan Quan <evan.quan at amd.com>
>>> Change-Id: Ibca3462ceaa26a27a9145282b60c6ce5deca7752
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/aldebaran.c        |  2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 25 ++---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  6 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       | 18 +---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |  7 --
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |  5 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c       |  5 +-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  2 +-
>>>    .../gpu/drm/amd/include/kgd_pp_interface.h    |  4 +
>>>    drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 95
>> +++++++++++++++++++
>>>    drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       | 25 ++++-
>>>    drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h       |  9 +-
>>>    drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 16 ++--
>>>    13 files changed, 155 insertions(+), 64 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>> b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>> index bcfdb63b1d42..a545df4efce1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
>>> @@ -260,7 +260,7 @@ static int aldebaran_mode2_restore_ip(struct
>> amdgpu_device *adev)
>>>    	adev->gfx.rlc.funcs->resume(adev);
>>>
>>>    	/* Wait for FW reset event complete */
>>> -	r = smu_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
>>> +	r = amdgpu_dpm_wait_for_event(adev,
>> SMU_EVENT_RESET_COMPLETE, 0);
>>>    	if (r) {
>>>    		dev_err(adev->dev,
>>>    			"Failed to get response from firmware after reset\n");
>> diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> index 164d6a9e9fbb..0d1f00b24aae 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> @@ -1585,22 +1585,25 @@ static int amdgpu_debugfs_sclk_set(void *data,
>> u64 val)
>>>    		return ret;
>>>    	}
>>>
>>> -	if (is_support_sw_smu(adev)) {
>>> -		ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
>> &min_freq, &max_freq);
>>> -		if (ret || val > max_freq || val < min_freq)
>>> -			return -EINVAL;
>>> -		ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
>> (uint32_t)val, (uint32_t)val);
>>> -	} else {
>>> -		return 0;
>>> +	ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK,
>> &min_freq, &max_freq);
>>> +	if (ret == -EOPNOTSUPP) {
>>> +		ret = 0;
>>> +		goto out;
>>>    	}
>>> +	if (ret || val > max_freq || val < min_freq) {
>>> +		ret = -EINVAL;
>>> +		goto out;
>>> +	}
>>> +
>>> +	ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK,
>> (uint32_t)val, (uint32_t)val);
>>> +	if (ret)
>>> +		ret = -EINVAL;
>>>
>>> +out:
>>>    	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>>    	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>>
>>> -	if (ret)
>>> -		return -EINVAL;
>>> -
>>> -	return 0;
>>> +	return ret;
>>>    }
>>>
>>>    DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 1989f9e9379e..41cc1ffb5809 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2617,7 +2617,7 @@ static int amdgpu_device_ip_late_init(struct
>> amdgpu_device *adev)
>>>    	if (adev->asic_type == CHIP_ARCTURUS &&
>>>    	    amdgpu_passthrough(adev) &&
>>>    	    adev->gmc.xgmi.num_physical_nodes > 1)
>>> -		smu_set_light_sbr(&adev->smu, true);
>>> +		amdgpu_dpm_set_light_sbr(adev, true);
>>>
>>>    	if (adev->gmc.xgmi.num_physical_nodes > 1) {
>>>    		mutex_lock(&mgpu_info.mutex);
>>> @@ -2857,7 +2857,7 @@ static int
>> amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
>>>    	int i, r;
>>>
>>>    	if (adev->in_s0ix)
>>> -		amdgpu_gfx_state_change_set(adev,
>> sGpuChangeState_D3Entry);
>>> +		amdgpu_dpm_gfx_state_change(adev,
>> sGpuChangeState_D3Entry);
>>>
>>>    	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
>>>    		if (!adev->ip_blocks[i].status.valid)
>>> @@ -3982,7 +3982,7 @@ int amdgpu_device_resume(struct drm_device
>> *dev, bool fbcon)
>>>    		return 0;
>>>
>>>    	if (adev->in_s0ix)
>>> -		amdgpu_gfx_state_change_set(adev,
>> sGpuChangeState_D0Entry);
>>> +		amdgpu_dpm_gfx_state_change(adev,
>> sGpuChangeState_D0Entry);
>>>
>>>    	/* post card */
>>>    	if (amdgpu_device_need_post(adev)) { diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 1916ec84dd71..3d8f82dc8c97 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -615,7 +615,7 @@ int amdgpu_get_gfx_off_status(struct
>> amdgpu_device
>>> *adev, uint32_t *value)
>>>
>>>    	mutex_lock(&adev->gfx.gfx_off_mutex);
>>>
>>> -	r = smu_get_status_gfxoff(adev, value);
>>> +	r = amdgpu_dpm_get_status_gfxoff(adev, value);
>>>
>>>    	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>>
>>> @@ -852,19 +852,3 @@ int amdgpu_gfx_get_num_kcq(struct
>> amdgpu_device *adev)
>>>    	}
>>>    	return amdgpu_num_kcq;
>>>    }
>>> -
>>> -/* amdgpu_gfx_state_change_set - Handle gfx power state change set
>>> - * @adev: amdgpu_device pointer
>>> - * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
>>> -sGpuChangeState_D3Entry)
>>> - *
>>> - */
>>> -
>>> -void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum
>>> gfx_change_state state) -{
>>> -	mutex_lock(&adev->pm.mutex);
>>> -	if (adev->powerplay.pp_funcs &&
>>> -	    adev->powerplay.pp_funcs->gfx_state_change_set)
>>> -		((adev)->powerplay.pp_funcs->gfx_state_change_set(
>>> -			(adev)->powerplay.pp_handle, state));
>>> -	mutex_unlock(&adev->pm.mutex);
>>> -}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index f851196c83a5..776c886fd94a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -47,12 +47,6 @@ enum amdgpu_gfx_pipe_priority {
>>>    	AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
>>>    };
>>>
>>> -/* Argument for PPSMC_MSG_GpuChangeState */ -enum
>> gfx_change_state {
>>> -	sGpuChangeState_D0Entry = 1,
>>> -	sGpuChangeState_D3Entry,
>>> -};
>>> -
>>>    #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
>>>    #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
>>>
>>> @@ -410,5 +404,4 @@ int amdgpu_gfx_cp_ecc_error_irq(struct
>> amdgpu_device *adev,
>>>    uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
>>>    void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg,
>> uint32_t v);
>>>    int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); -void
>>> amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum
>> gfx_change_state state);
>>>    #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>>> index 3c623e589b79..35c4aec04a7e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>>> @@ -901,7 +901,7 @@ static void amdgpu_ras_get_ecc_info(struct
>> amdgpu_device *adev, struct ras_err_d
>>>    	 * choosing right query method according to
>>>    	 * whether smu support query error information
>>>    	 */
>>> -	ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc));
>>> +	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
>>>    	if (ret == -EOPNOTSUPP) {
>>>    		if (adev->umc.ras_funcs &&
>>>    			adev->umc.ras_funcs->query_ras_error_count)
>>> @@ -2132,8 +2132,7 @@ int amdgpu_ras_recovery_init(struct
>> amdgpu_device *adev)
>>>    		if (ret)
>>>    			goto free;
>>>
>>> -		if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num)
>>> -			adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num(&adev->smu, con-
>>> eeprom_control.ras_num_recs);
>>> +		amdgpu_dpm_send_hbm_bad_pages_num(adev,
>>> +con->eeprom_control.ras_num_recs);
>>>    	}
>>>
>>>    #ifdef CONFIG_X86_MCE_AMD
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
>>> index 6e4bea012ea4..5fed26c8db44 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
>>> @@ -97,7 +97,7 @@ int amdgpu_umc_process_ras_data_cb(struct
>> amdgpu_device *adev,
>>>    	int ret = 0;
>>>
>>>    	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
>>> -	ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc));
>>> +	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
>>>    	if (ret == -EOPNOTSUPP) {
>>>    		if (adev->umc.ras_funcs &&
>>>    		    adev->umc.ras_funcs->query_ras_error_count)
>>> @@ -160,8 +160,7 @@ int amdgpu_umc_process_ras_data_cb(struct
>> amdgpu_device *adev,
>>>    						err_data->err_addr_cnt);
>>>    			amdgpu_ras_save_bad_pages(adev);
>>>
>>> -			if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num)
>>> -				adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num(&adev->smu, con-
>>> eeprom_control.ras_num_recs);
>>> +			amdgpu_dpm_send_hbm_bad_pages_num(adev,
>>> +con->eeprom_control.ras_num_recs);
>>>    		}
>>>
>>>    		amdgpu_ras_reset_gpu(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> index deae12dc777d..329a4c89f1e6 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
>>> @@ -222,7 +222,7 @@ void
>>> kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
>>>
>>>    	len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
>>>    		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
>>> -		       atomic64_read(&dev->adev->smu.throttle_int_counter));
>>> +		       amdgpu_dpm_get_thermal_throttling_counter(dev-
>>> adev));
>>>
>>>    	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,
>> 	fifo_in, len);
>>>    }
>>> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> index 5c0867ebcfce..2e295facd086 100644
>>> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> @@ -26,6 +26,10 @@
>>>
>>>    extern const struct amdgpu_ip_block_version pp_smu_ip_block;
>>>
>>> +enum smu_event_type {
>>> +	SMU_EVENT_RESET_COMPLETE = 0,
>>> +};
>>> +
>>>    struct amd_vce_state {
>>>    	/* vce clocks */
>>>    	u32 evclk;
>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> index 08362d506534..9b332c8a0079 100644
>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> @@ -1614,3 +1614,98 @@ int amdgpu_pm_load_smu_firmware(struct
>>> amdgpu_device *adev, uint32_t *smu_versio
>>>
>>>    	return 0;
>>>    }
>>> +
>>> +int amdgpu_dpm_set_light_sbr(struct amdgpu_device *adev, bool
>> enable)
>>> +{
>>> +	return smu_set_light_sbr(&adev->smu, enable); }
>>> +
>>> +int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device
>> *adev,
>>> +uint32_t size) {
>>> +	int ret = 0;
>>> +
>>> +	if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num)
>>> +		ret = adev->smu.ppt_funcs-
>>> send_hbm_bad_pages_num(&adev->smu,
>>> +size);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
>>> +				  enum pp_clock_type type,
>>> +				  uint32_t *min,
>>> +				  uint32_t *max)
>>> +{
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	switch (type) {
>>> +	case PP_SCLK:
>>> +		return smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
>> min, max);
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>>> +}
>>> +
>>> +int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
>>> +				   enum pp_clock_type type,
>>> +				   uint32_t min,
>>> +				   uint32_t max)
>>> +{
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	switch (type) {
>>> +	case PP_SCLK:
>>> +		return smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
>> min, max);
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>>> +}
>>> +
>>> +int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
>>> +			      enum smu_event_type event,
>>> +			      uint64_t event_arg)
>>> +{
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	return smu_wait_for_event(&adev->smu, event, event_arg); }
>>> +
>>> +int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
>> uint32_t
>>> +*value) {
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	return smu_get_status_gfxoff(&adev->smu, value); }
>>> +
>>> +uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>>> +amdgpu_device *adev) {
>>> +	return atomic64_read(&adev->smu.throttle_int_counter);
>>> +}
>>> +
>>> +/* amdgpu_dpm_gfx_state_change - Handle gfx power state change set
>>> + * @adev: amdgpu_device pointer
>>> + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
>>> +-sGpuChangeState_D3Entry)
>>> + *
>>> + */
>>> +void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
>>> +				 enum gfx_change_state state)
>>> +{
>>> +	mutex_lock(&adev->pm.mutex);
>>> +	if (adev->powerplay.pp_funcs &&
>>> +	    adev->powerplay.pp_funcs->gfx_state_change_set)
>>> +		((adev)->powerplay.pp_funcs->gfx_state_change_set(
>>> +			(adev)->powerplay.pp_handle, state));
>>> +	mutex_unlock(&adev->pm.mutex);
>>> +}
>>> +
>>> +int amdgpu_dpm_get_ecc_info(struct amdgpu_device *adev,
>>> +			    void *umc_ecc)
>>> +{
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>
>> In general, I don't think we need to keep this check everywhere to make
>> amdgpu_dpm_* backwards compatible.  The usage is also inconsistent. For
>> ex: amdgpu_dpm_get_thermal_throttling_counter doesn't have any
>> is_support_sw_smu check whereas amdgpu_dpm_get_ecc_info() has it.
>> There is no reason to keep adding is_support_sw_smu() check for every new
>> public API. For sure, they are not going to work with powerplay subsystem.
>>
>> I would rather prefer to leave old things and create amdgpu_smu_* for
>> anything which is supported only in smu subsystem. It's easier to read from
>> code perspective also - separate the ones which is supported by smu
>> component and not supported in older powerplay components.
>>
>> Only for the common ones that are supported in powerplay and smu, keep
>> amdgpu_dpm_*, for others preference would be to keep amdgpu_smu_*.
> [Quan, Evan] I get your point. However, then it will bring back the problem we are trying to avoid.
> That is the caller need to know whether the amdgpu_smu_* can be used. They need to know whether the swsmu framework is supported on some ASIC. >

swsmu has been for sometime. I'm suggesting to move away from 
amdgpu_dpm_* which is the legacy interface. There is no need to add new 
dpm_* APIs which are supported only in swsmu component. We only need to 
maintain the existing usage of dpm_*.

For the newer ones, let us move to component based APIs like 
amdgpu_smu_*. All the clients of swsmu are part of this component based 
architecture and they need to be aware of services of swsmu. It is 
similar to what is followed in other components like amdgpu_gmc_*, 
amdgpu_vcn* etc.

Thanks,
Lijo

> And yes, there is some inconsistent cases existing in current power code. Maybe we can create new patche(s) to fix them?
> For this patch series, I would like to avoid any real code logic change.
> 
> BR
> Evan
>>
>> Thanks,
>> Lijo
>>
>>> +	return smu_get_ecc_info(&adev->smu, umc_ecc); }
>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> index 16e3f72d31b9..7289d379a9fb 100644
>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> @@ -23,6 +23,12 @@
>>>    #ifndef __AMDGPU_DPM_H__
>>>    #define __AMDGPU_DPM_H__
>>>
>>> +/* Argument for PPSMC_MSG_GpuChangeState */ enum
>> gfx_change_state {
>>> +	sGpuChangeState_D0Entry = 1,
>>> +	sGpuChangeState_D3Entry,
>>> +};
>>> +
>>>    enum amdgpu_int_thermal_type {
>>>    	THERMAL_TYPE_NONE,
>>>    	THERMAL_TYPE_EXTERNAL,
>>> @@ -574,5 +580,22 @@ void amdgpu_dpm_enable_vce(struct
>> amdgpu_device *adev, bool enable);
>>>    void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool
>> enable);
>>>    void amdgpu_pm_print_power_states(struct amdgpu_device *adev);
>>>    int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev,
>> uint32_t
>>> *smu_version);
>>> -
>>> +int amdgpu_dpm_set_light_sbr(struct amdgpu_device *adev, bool
>>> +enable); int amdgpu_dpm_send_hbm_bad_pages_num(struct
>> amdgpu_device
>>> +*adev, uint32_t size); int amdgpu_dpm_get_dpm_freq_range(struct
>> amdgpu_device *adev,
>>> +				       enum pp_clock_type type,
>>> +				       uint32_t *min,
>>> +				       uint32_t *max);
>>> +int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
>>> +				        enum pp_clock_type type,
>>> +				        uint32_t min,
>>> +				        uint32_t max);
>>> +int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
>> smu_event_type event,
>>> +		       uint64_t event_arg);
>>> +int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
>> uint32_t
>>> +*value); uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>>> +amdgpu_device *adev); void amdgpu_dpm_gfx_state_change(struct
>> amdgpu_device *adev,
>>> +				 enum gfx_change_state state);
>>> +int amdgpu_dpm_get_ecc_info(struct amdgpu_device *adev,
>>> +			    void *umc_ecc);
>>>    #endif
>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
>>> index f738f7dc20c9..29791bb21fba 100644
>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
>>> @@ -241,11 +241,6 @@ struct smu_user_dpm_profile {
>>>    	uint32_t clk_dependency;
>>>    };
>>>
>>> -enum smu_event_type {
>>> -
>>> -	SMU_EVENT_RESET_COMPLETE = 0,
>>> -};
>>> -
>>>    #define SMU_TABLE_INIT(tables, table_id, s, a, d)	\
>>>    	do {						\
>>>    		tables[table_id].size = s;		\
>>> @@ -1412,11 +1407,11 @@ int smu_set_ac_dc(struct smu_context *smu);
>>>
>>>    int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>>>
>>> -int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value);
>>> +int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>>>
>>>    int smu_set_light_sbr(struct smu_context *smu, bool enable);
>>>
>>> -int smu_wait_for_event(struct amdgpu_device *adev, enum
>>> smu_event_type event,
>>> +int smu_wait_for_event(struct smu_context *smu, enum
>> smu_event_type
>>> +event,
>>>    		       uint64_t event_arg);
>>>    int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
>>>    int smu_stb_collect_info(struct smu_context *smu, void *buff,
>>> uint32_t size); diff --git
>> a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> index 5839918cb574..ef7d0e377965 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> @@ -100,17 +100,14 @@ static int smu_sys_set_pp_feature_mask(void
>> *handle,
>>>    	return ret;
>>>    }
>>>
>>> -int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value)
>>> +int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>>>    {
>>> -	int ret = 0;
>>> -	struct smu_context *smu = &adev->smu;
>>> +	if (!smu->ppt_funcs->get_gfx_off_status)
>>> +		return -EINVAL;
>>>
>>> -	if (is_support_sw_smu(adev) && smu->ppt_funcs-
>>> get_gfx_off_status)
>>> -		*value = smu_get_gfx_off_status(smu);
>>> -	else
>>> -		ret = -EINVAL;
>>> +	*value = smu_get_gfx_off_status(smu);
>>>
>>> -	return ret;
>>> +	return 0;
>>>    }
>>>
>>>    int smu_set_soft_freq_range(struct smu_context *smu, @@ -3167,11
>>> +3164,10 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
>>>    	.get_smu_prv_buf_details = smu_get_prv_buffer_details,
>>>    };
>>>
>>> -int smu_wait_for_event(struct amdgpu_device *adev, enum
>>> smu_event_type event,
>>> +int smu_wait_for_event(struct smu_context *smu, enum
>> smu_event_type
>>> +event,
>>>    		       uint64_t event_arg)
>>>    {
>>>    	int ret = -EINVAL;
>>> -	struct smu_context *smu = &adev->smu;
>>>
>>>    	if (smu->ppt_funcs->wait_for_event) {
>>>    		mutex_lock(&smu->mutex);
>>>