[PATCH V2 01/17] drm/amd/pm: do not expose implementation details to other blocks out of power
Quan, Evan
Evan.Quan at amd.com
Wed Dec 1 07:07:03 UTC 2021
[AMD Official Use Only]
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar at amd.com>
> Sent: Wednesday, December 1, 2021 11:33 AM
> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Feng, Kenneth
> <Kenneth.Feng at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
> Subject: Re: [PATCH V2 01/17] drm/amd/pm: do not expose implementation
> details to other blocks out of power
>
>
>
> On 12/1/2021 7:29 AM, Quan, Evan wrote:
> > [AMD Official Use Only]
> >
> >
> >
> >> -----Original Message-----
> >> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> >> Lazar, Lijo
> >> Sent: Tuesday, November 30, 2021 4:10 PM
> >> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
> >> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Feng, Kenneth
> >> <Kenneth.Feng at amd.com>; Koenig, Christian
> <Christian.Koenig at amd.com>
> >> Subject: Re: [PATCH V2 01/17] drm/amd/pm: do not expose
> >> implementation details to other blocks out of power
> >>
> >>
> >>
> >> On 11/30/2021 1:12 PM, Evan Quan wrote:
> >>> Those implementation details(whether swsmu supported, some
> ppt_funcs
> >>> supported, accessing internal statistics ...)should be kept
> >>> internally. It's not a good practice and even error prone to expose
> >> implementation details.
> >>>
> >>> Signed-off-by: Evan Quan <evan.quan at amd.com>
> >>> Change-Id: Ibca3462ceaa26a27a9145282b60c6ce5deca7752
> >>> ---
> >>> drivers/gpu/drm/amd/amdgpu/aldebaran.c | 2 +-
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 25 ++---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 18 +---
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 7 --
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +-
> >>> drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 +-
> >>> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 2 +-
> >>> .../gpu/drm/amd/include/kgd_pp_interface.h | 4 +
> >>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 95
> >> +++++++++++++++++++
> >>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 25 ++++-
> >>> drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 9 +-
> >>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 16 ++--
> >>> 13 files changed, 155 insertions(+), 64 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> >>> b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> >>> index bcfdb63b1d42..a545df4efce1 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> >>> @@ -260,7 +260,7 @@ static int aldebaran_mode2_restore_ip(struct
> >> amdgpu_device *adev)
> >>> adev->gfx.rlc.funcs->resume(adev);
> >>>
> >>> /* Wait for FW reset event complete */
> >>> - r = smu_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
> >>> + r = amdgpu_dpm_wait_for_event(adev,
> >> SMU_EVENT_RESET_COMPLETE, 0);
> >>> if (r) {
> >>> dev_err(adev->dev,
> >>> "Failed to get response from firmware after reset\n");
> >> diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> index 164d6a9e9fbb..0d1f00b24aae 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> @@ -1585,22 +1585,25 @@ static int amdgpu_debugfs_sclk_set(void
> >>> *data,
> >> u64 val)
> >>> return ret;
> >>> }
> >>>
> >>> - if (is_support_sw_smu(adev)) {
> >>> - ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
> >> &min_freq, &max_freq);
> >>> - if (ret || val > max_freq || val < min_freq)
> >>> - return -EINVAL;
> >>> - ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
> >> (uint32_t)val, (uint32_t)val);
> >>> - } else {
> >>> - return 0;
> >>> + ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK,
> >> &min_freq, &max_freq);
> >>> + if (ret == -EOPNOTSUPP) {
> >>> + ret = 0;
> >>> + goto out;
> >>> }
> >>> + if (ret || val > max_freq || val < min_freq) {
> >>> + ret = -EINVAL;
> >>> + goto out;
> >>> + }
> >>> +
> >>> + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK,
> >> (uint32_t)val, (uint32_t)val);
> >>> + if (ret)
> >>> + ret = -EINVAL;
> >>>
> >>> +out:
> >>> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>>
> >>> - if (ret)
> >>> - return -EINVAL;
> >>> -
> >>> - return 0;
> >>> + return ret;
> >>> }
> >>>
> >>> DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index 1989f9e9379e..41cc1ffb5809 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -2617,7 +2617,7 @@ static int amdgpu_device_ip_late_init(struct
> >> amdgpu_device *adev)
> >>> if (adev->asic_type == CHIP_ARCTURUS &&
> >>> amdgpu_passthrough(adev) &&
> >>> adev->gmc.xgmi.num_physical_nodes > 1)
> >>> - smu_set_light_sbr(&adev->smu, true);
> >>> + amdgpu_dpm_set_light_sbr(adev, true);
> >>>
> >>> if (adev->gmc.xgmi.num_physical_nodes > 1) {
> >>> mutex_lock(&mgpu_info.mutex);
> >>> @@ -2857,7 +2857,7 @@ static int
> >> amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
> >>> int i, r;
> >>>
> >>> if (adev->in_s0ix)
> >>> - amdgpu_gfx_state_change_set(adev,
> >> sGpuChangeState_D3Entry);
> >>> + amdgpu_dpm_gfx_state_change(adev,
> >> sGpuChangeState_D3Entry);
> >>>
> >>> for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
> >>> if (!adev->ip_blocks[i].status.valid)
> >>> @@ -3982,7 +3982,7 @@ int amdgpu_device_resume(struct drm_device
> >> *dev, bool fbcon)
> >>> return 0;
> >>>
> >>> if (adev->in_s0ix)
> >>> - amdgpu_gfx_state_change_set(adev,
> >> sGpuChangeState_D0Entry);
> >>> + amdgpu_dpm_gfx_state_change(adev,
> >> sGpuChangeState_D0Entry);
> >>>
> >>> /* post card */
> >>> if (amdgpu_device_need_post(adev)) { diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> index 1916ec84dd71..3d8f82dc8c97 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> @@ -615,7 +615,7 @@ int amdgpu_get_gfx_off_status(struct
> >> amdgpu_device
> >>> *adev, uint32_t *value)
> >>>
> >>> mutex_lock(&adev->gfx.gfx_off_mutex);
> >>>
> >>> - r = smu_get_status_gfxoff(adev, value);
> >>> + r = amdgpu_dpm_get_status_gfxoff(adev, value);
> >>>
> >>> mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>>
> >>> @@ -852,19 +852,3 @@ int amdgpu_gfx_get_num_kcq(struct
> >> amdgpu_device *adev)
> >>> }
> >>> return amdgpu_num_kcq;
> >>> }
> >>> -
> >>> -/* amdgpu_gfx_state_change_set - Handle gfx power state change set
> >>> - * @adev: amdgpu_device pointer
> >>> - * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
> >>> -sGpuChangeState_D3Entry)
> >>> - *
> >>> - */
> >>> -
> >>> -void amdgpu_gfx_state_change_set(struct amdgpu_device *adev,
> enum
> >>> gfx_change_state state) -{
> >>> - mutex_lock(&adev->pm.mutex);
> >>> - if (adev->powerplay.pp_funcs &&
> >>> - adev->powerplay.pp_funcs->gfx_state_change_set)
> >>> - ((adev)->powerplay.pp_funcs->gfx_state_change_set(
> >>> - (adev)->powerplay.pp_handle, state));
> >>> - mutex_unlock(&adev->pm.mutex);
> >>> -}
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> index f851196c83a5..776c886fd94a 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> @@ -47,12 +47,6 @@ enum amdgpu_gfx_pipe_priority {
> >>> AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
> >>> };
> >>>
> >>> -/* Argument for PPSMC_MSG_GpuChangeState */ -enum
> >> gfx_change_state {
> >>> - sGpuChangeState_D0Entry = 1,
> >>> - sGpuChangeState_D3Entry,
> >>> -};
> >>> -
> >>> #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
> >>> #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
> >>>
> >>> @@ -410,5 +404,4 @@ int amdgpu_gfx_cp_ecc_error_irq(struct
> >> amdgpu_device *adev,
> >>> uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
> >>> void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg,
> >> uint32_t v);
> >>> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); -void
> >>> amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum
> >> gfx_change_state state);
> >>> #endif
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> >>> index 3c623e589b79..35c4aec04a7e 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> >>> @@ -901,7 +901,7 @@ static void amdgpu_ras_get_ecc_info(struct
> >> amdgpu_device *adev, struct ras_err_d
> >>> * choosing right query method according to
> >>> * whether smu support query error information
> >>> */
> >>> - ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc));
> >>> + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
> >>> if (ret == -EOPNOTSUPP) {
> >>> if (adev->umc.ras_funcs &&
> >>> adev->umc.ras_funcs->query_ras_error_count)
> >>> @@ -2132,8 +2132,7 @@ int amdgpu_ras_recovery_init(struct
> >> amdgpu_device *adev)
> >>> if (ret)
> >>> goto free;
> >>>
> >>> - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num)
> >>> - adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num(&adev->smu, con-
> >>> eeprom_control.ras_num_recs);
> >>> + amdgpu_dpm_send_hbm_bad_pages_num(adev,
> >>> +con->eeprom_control.ras_num_recs);
> >>> }
> >>>
> >>> #ifdef CONFIG_X86_MCE_AMD
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> >>> index 6e4bea012ea4..5fed26c8db44 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> >>> @@ -97,7 +97,7 @@ int amdgpu_umc_process_ras_data_cb(struct
> >> amdgpu_device *adev,
> >>> int ret = 0;
> >>>
> >>> kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
> >>> - ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc));
> >>> + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
> >>> if (ret == -EOPNOTSUPP) {
> >>> if (adev->umc.ras_funcs &&
> >>> adev->umc.ras_funcs->query_ras_error_count)
> >>> @@ -160,8 +160,7 @@ int amdgpu_umc_process_ras_data_cb(struct
> >> amdgpu_device *adev,
> >>> err_data->err_addr_cnt);
> >>> amdgpu_ras_save_bad_pages(adev);
> >>>
> >>> - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num)
> >>> - adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num(&adev->smu, con-
> >>> eeprom_control.ras_num_recs);
> >>> + amdgpu_dpm_send_hbm_bad_pages_num(adev,
> >>> +con->eeprom_control.ras_num_recs);
> >>> }
> >>>
> >>> amdgpu_ras_reset_gpu(adev);
> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> >>> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> >>> index deae12dc777d..329a4c89f1e6 100644
> >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> >>> @@ -222,7 +222,7 @@ void
> >>> kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
> >>>
> >>> len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
> >>> KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
> >>> - atomic64_read(&dev->adev->smu.throttle_int_counter));
> >>> + amdgpu_dpm_get_thermal_throttling_counter(dev-
> >>> adev));
> >>>
> >>> add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,
> >> fifo_in, len);
> >>> }
> >>> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >>> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >>> index 5c0867ebcfce..2e295facd086 100644
> >>> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >>> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >>> @@ -26,6 +26,10 @@
> >>>
> >>> extern const struct amdgpu_ip_block_version pp_smu_ip_block;
> >>>
> >>> +enum smu_event_type {
> >>> + SMU_EVENT_RESET_COMPLETE = 0,
> >>> +};
> >>> +
> >>> struct amd_vce_state {
> >>> /* vce clocks */
> >>> u32 evclk;
> >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> index 08362d506534..9b332c8a0079 100644
> >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> @@ -1614,3 +1614,98 @@ int amdgpu_pm_load_smu_firmware(struct
> >>> amdgpu_device *adev, uint32_t *smu_versio
> >>>
> >>> return 0;
> >>> }
> >>> +
> >>> +int amdgpu_dpm_set_light_sbr(struct amdgpu_device *adev, bool
> >> enable)
> >>> +{
> >>> + return smu_set_light_sbr(&adev->smu, enable); }
> >>> +
> >>> +int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device
> >> *adev,
> >>> +uint32_t size) {
> >>> + int ret = 0;
> >>> +
> >>> + if (adev->smu.ppt_funcs && adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num)
> >>> + ret = adev->smu.ppt_funcs-
> >>> send_hbm_bad_pages_num(&adev->smu,
> >>> +size);
> >>> +
> >>> + return ret;
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
> >>> + enum pp_clock_type type,
> >>> + uint32_t *min,
> >>> + uint32_t *max)
> >>> +{
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + switch (type) {
> >>> + case PP_SCLK:
> >>> + return smu_get_dpm_freq_range(&adev->smu, SMU_SCLK,
> >> min, max);
> >>> + default:
> >>> + return -EINVAL;
> >>> + }
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
> >>> + enum pp_clock_type type,
> >>> + uint32_t min,
> >>> + uint32_t max)
> >>> +{
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + switch (type) {
> >>> + case PP_SCLK:
> >>> + return smu_set_soft_freq_range(&adev->smu, SMU_SCLK,
> >> min, max);
> >>> + default:
> >>> + return -EINVAL;
> >>> + }
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
> >>> + enum smu_event_type event,
> >>> + uint64_t event_arg)
> >>> +{
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + return smu_wait_for_event(&adev->smu, event, event_arg); }
> >>> +
> >>> +int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >> uint32_t
> >>> +*value) {
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + return smu_get_status_gfxoff(&adev->smu, value); }
> >>> +
> >>> +uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
> >>> +amdgpu_device *adev) {
> >>> + return atomic64_read(&adev->smu.throttle_int_counter);
> >>> +}
> >>> +
> >>> +/* amdgpu_dpm_gfx_state_change - Handle gfx power state change
> set
> >>> + * @adev: amdgpu_device pointer
> >>> + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2
> >>> +-sGpuChangeState_D3Entry)
> >>> + *
> >>> + */
> >>> +void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> >>> + enum gfx_change_state state)
> >>> +{
> >>> + mutex_lock(&adev->pm.mutex);
> >>> + if (adev->powerplay.pp_funcs &&
> >>> + adev->powerplay.pp_funcs->gfx_state_change_set)
> >>> + ((adev)->powerplay.pp_funcs->gfx_state_change_set(
> >>> + (adev)->powerplay.pp_handle, state));
> >>> + mutex_unlock(&adev->pm.mutex);
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_ecc_info(struct amdgpu_device *adev,
> >>> + void *umc_ecc)
> >>> +{
> >>> + if (!is_support_sw_smu(adev))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>
> >> In general, I don't think we need to keep this check everywhere to
> >> make
> >> amdgpu_dpm_* backwards compatible. The usage is also inconsistent.
> >> For
> >> ex: amdgpu_dpm_get_thermal_throttling_counter doesn't have any
> >> is_support_sw_smu check whereas amdgpu_dpm_get_ecc_info() has it.
> >> There is no reason to keep adding is_support_sw_smu() check for every
> >> new public API. For sure, they are not going to work with powerplay
> subsystem.
> >>
> >> I would rather prefer to leave old things and create amdgpu_smu_* for
> >> anything which is supported only in smu subsystem. It's easier to
> >> read from code perspective also - separate the ones which is
> >> supported by smu component and not supported in older powerplay
> components.
> >>
> >> Only for the common ones that are supported in powerplay and smu,
> >> keep amdgpu_dpm_*, for others preference would be to keep
> amdgpu_smu_*.
> > [Quan, Evan] I get your point. However, then it will bring back the problem
> we are trying to avoid.
> > That is the caller need to know whether the amdgpu_smu_* can be used.
> > They need to know whether the swsmu framework is supported on some
> > ASIC. >
>
> swsmu has been for sometime. I'm suggesting to move away from
> amdgpu_dpm_* which is the legacy interface. There is no need to add new
> dpm_* APIs which are supported only in swsmu component. We only need
> to maintain the existing usage of dpm_*.
>
> For the newer ones, let us move to component based APIs like
> amdgpu_smu_*. All the clients of swsmu are part of this component based
> architecture and they need to be aware of services of swsmu. It is similar to
> what is followed in other components like amdgpu_gmc_*,
> amdgpu_vcn* etc.
[Quan, Evan] I'm open to the idea that move all swsmu based APIs to amdgpu_smu_*.
Maybe we can list that as a TODO in the further optimizations.
What I worried is the case below:
1. For gfx v9, Arcturus and Aldebaran support swsmu while other ASICs not.
2. Then for any swsmu API called from gfx_v9_0.c, there will be a "if (adev->asic_type == CHIP_ARCTURUS)" or similar guard needed. That will need the user know the details above.
For this patch, can we just stick to original amdgpu_dpm_* way? As I really do not want to involve too many changes.
BR
Evan
>
> Thanks,
> Lijo
>
> > And yes, there is some inconsistent cases existing in current power code.
> Maybe we can create new patche(s) to fix them?
> > For this patch series, I would like to avoid any real code logic change.
> >
> > BR
> > Evan
> >>
> >> Thanks,
> >> Lijo
> >>
> >>> + return smu_get_ecc_info(&adev->smu, umc_ecc); }
> >>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> index 16e3f72d31b9..7289d379a9fb 100644
> >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> @@ -23,6 +23,12 @@
> >>> #ifndef __AMDGPU_DPM_H__
> >>> #define __AMDGPU_DPM_H__
> >>>
> >>> +/* Argument for PPSMC_MSG_GpuChangeState */ enum
> >> gfx_change_state {
> >>> + sGpuChangeState_D0Entry = 1,
> >>> + sGpuChangeState_D3Entry,
> >>> +};
> >>> +
> >>> enum amdgpu_int_thermal_type {
> >>> THERMAL_TYPE_NONE,
> >>> THERMAL_TYPE_EXTERNAL,
> >>> @@ -574,5 +580,22 @@ void amdgpu_dpm_enable_vce(struct
> >> amdgpu_device *adev, bool enable);
> >>> void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool
> >> enable);
> >>> void amdgpu_pm_print_power_states(struct amdgpu_device *adev);
> >>> int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev,
> >> uint32_t
> >>> *smu_version);
> >>> -
> >>> +int amdgpu_dpm_set_light_sbr(struct amdgpu_device *adev, bool
> >>> +enable); int amdgpu_dpm_send_hbm_bad_pages_num(struct
> >> amdgpu_device
> >>> +*adev, uint32_t size); int amdgpu_dpm_get_dpm_freq_range(struct
> >> amdgpu_device *adev,
> >>> + enum pp_clock_type type,
> >>> + uint32_t *min,
> >>> + uint32_t *max);
> >>> +int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
> >>> + enum pp_clock_type type,
> >>> + uint32_t min,
> >>> + uint32_t max);
> >>> +int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> >> smu_event_type event,
> >>> + uint64_t event_arg);
> >>> +int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >> uint32_t
> >>> +*value); uint64_t
> amdgpu_dpm_get_thermal_throttling_counter(struct
> >>> +amdgpu_device *adev); void amdgpu_dpm_gfx_state_change(struct
> >> amdgpu_device *adev,
> >>> + enum gfx_change_state state);
> >>> +int amdgpu_dpm_get_ecc_info(struct amdgpu_device *adev,
> >>> + void *umc_ecc);
> >>> #endif
> >>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> >>> index f738f7dc20c9..29791bb21fba 100644
> >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
> >>> @@ -241,11 +241,6 @@ struct smu_user_dpm_profile {
> >>> uint32_t clk_dependency;
> >>> };
> >>>
> >>> -enum smu_event_type {
> >>> -
> >>> - SMU_EVENT_RESET_COMPLETE = 0,
> >>> -};
> >>> -
> >>> #define SMU_TABLE_INIT(tables, table_id, s, a, d) \
> >>> do { \
> >>> tables[table_id].size = s; \
> >>> @@ -1412,11 +1407,11 @@ int smu_set_ac_dc(struct smu_context
> *smu);
> >>>
> >>> int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> >>>
> >>> -int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> >>> *value);
> >>> +int smu_get_status_gfxoff(struct smu_context *smu, uint32_t
> >>> +*value);
> >>>
> >>> int smu_set_light_sbr(struct smu_context *smu, bool enable);
> >>>
> >>> -int smu_wait_for_event(struct amdgpu_device *adev, enum
> >>> smu_event_type event,
> >>> +int smu_wait_for_event(struct smu_context *smu, enum
> >> smu_event_type
> >>> +event,
> >>> uint64_t event_arg);
> >>> int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
> >>> int smu_stb_collect_info(struct smu_context *smu, void *buff,
> >>> uint32_t size); diff --git
> >> a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> index 5839918cb574..ef7d0e377965 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> @@ -100,17 +100,14 @@ static int smu_sys_set_pp_feature_mask(void
> >> *handle,
> >>> return ret;
> >>> }
> >>>
> >>> -int smu_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> >>> *value)
> >>> +int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> >>> {
> >>> - int ret = 0;
> >>> - struct smu_context *smu = &adev->smu;
> >>> + if (!smu->ppt_funcs->get_gfx_off_status)
> >>> + return -EINVAL;
> >>>
> >>> - if (is_support_sw_smu(adev) && smu->ppt_funcs-
> >>> get_gfx_off_status)
> >>> - *value = smu_get_gfx_off_status(smu);
> >>> - else
> >>> - ret = -EINVAL;
> >>> + *value = smu_get_gfx_off_status(smu);
> >>>
> >>> - return ret;
> >>> + return 0;
> >>> }
> >>>
> >>> int smu_set_soft_freq_range(struct smu_context *smu, @@ -3167,11
> >>> +3164,10 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
> >>> .get_smu_prv_buf_details = smu_get_prv_buffer_details,
> >>> };
> >>>
> >>> -int smu_wait_for_event(struct amdgpu_device *adev, enum
> >>> smu_event_type event,
> >>> +int smu_wait_for_event(struct smu_context *smu, enum
> >> smu_event_type
> >>> +event,
> >>> uint64_t event_arg)
> >>> {
> >>> int ret = -EINVAL;
> >>> - struct smu_context *smu = &adev->smu;
> >>>
> >>> if (smu->ppt_funcs->wait_for_event) {
> >>> mutex_lock(&smu->mutex);
> >>>
More information about the amd-gfx
mailing list