[PATCH V3 4/7] drm/amd/pm: correct the usage for 'supported' member of smu_feature structure
Nathan Chancellor
nathan at kernel.org
Tue Feb 8 18:24:29 UTC 2022
Hi Evan,
On Fri, Jan 28, 2022 at 03:04:52PM +0800, Evan Quan wrote:
> The supported features should be retrieved just after EnableAllDpmFeatures message
> complete. And the check(whether some dpm feature is supported) is only needed when we
> decide to enable or disable it.
>
> Signed-off-by: Evan Quan <evan.quan at amd.com>
> Change-Id: I07c9a5ac5290cd0d88a40ce1768d393156419b5a
> ---
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 11 +++++++++++
> drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 8 ++++----
> .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 10 +++++-----
> drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 3 ---
> drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 5 +----
> drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 ---
> drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 3 ---
> 7 files changed, 21 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index ae48cc5aa567..803068cb5079 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1057,8 +1057,10 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu)
>
> static int smu_smc_hw_setup(struct smu_context *smu)
> {
> + struct smu_feature *feature = &smu->smu_feature;
> struct amdgpu_device *adev = smu->adev;
> uint32_t pcie_gen = 0, pcie_width = 0;
> + uint64_t features_supported;
> int ret = 0;
>
> if (adev->in_suspend && smu_is_dpm_running(smu)) {
> @@ -1138,6 +1140,15 @@ static int smu_smc_hw_setup(struct smu_context *smu)
> return ret;
> }
>
> + ret = smu_feature_get_enabled_mask(smu, &features_supported);
> + if (ret) {
> + dev_err(adev->dev, "Failed to retrieve supported dpm features!\n");
> + return ret;
> + }
> + bitmap_copy(feature->supported,
> + (unsigned long *)&features_supported,
> + feature->feature_num);
> +
> if (!smu_is_dpm_running(smu))
> dev_info(adev->dev, "dpm has been disabled\n");
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 84cbde3f913d..f55ead5f9aba 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -1624,8 +1624,8 @@ static int navi10_display_config_changed(struct smu_context *smu)
> int ret = 0;
>
> if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
> - smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
> - smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
> + smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
> + smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
> ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays,
> smu->display_config->num_display,
> NULL);
> @@ -1860,13 +1860,13 @@ static int navi10_notify_smc_display_config(struct smu_context *smu)
> min_clocks.dcef_clock_in_sr = smu->display_config->min_dcef_deep_sleep_set_clk;
> min_clocks.memory_clock = smu->display_config->min_mem_set_clock;
>
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
> clock_req.clock_type = amd_pp_dcef_clock;
> clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10;
>
> ret = smu_v11_0_display_clock_voltage_request(smu, &clock_req);
> if (!ret) {
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
> ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetMinDeepSleepDcefclk,
> min_clocks.dcef_clock_in_sr/100,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> index b6759f8b5167..804e1c98238d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> @@ -1280,8 +1280,8 @@ static int sienna_cichlid_display_config_changed(struct smu_context *smu)
> int ret = 0;
>
> if ((smu->watermarks_bitmap & WATERMARKS_EXIST) &&
> - smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
> - smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
> + smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) &&
> + smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) {
> #if 0
> ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays,
> smu->display_config->num_display,
> @@ -1517,13 +1517,13 @@ static int sienna_cichlid_notify_smc_display_config(struct smu_context *smu)
> min_clocks.dcef_clock_in_sr = smu->display_config->min_dcef_deep_sleep_set_clk;
> min_clocks.memory_clock = smu->display_config->min_mem_set_clock;
>
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) {
> clock_req.clock_type = amd_pp_dcef_clock;
> clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10;
>
> ret = smu_v11_0_display_clock_voltage_request(smu, &clock_req);
> if (!ret) {
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DS_DCEFCLK_BIT)) {
> ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_SetMinDeepSleepDcefclk,
> min_clocks.dcef_clock_in_sr/100,
> @@ -3785,7 +3785,7 @@ static int sienna_cichlid_gpo_control(struct smu_context *smu,
> int ret = 0;
>
>
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_GFX_GPO_BIT)) {
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFX_GPO_BIT)) {
> ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
> if (ret)
> return ret;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> index b34d3a416939..d36b64371492 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
> @@ -808,7 +808,6 @@ int smu_v11_0_system_features_control(struct smu_context *smu,
> return ret;
>
> bitmap_zero(feature->enabled, feature->feature_num);
> - bitmap_zero(feature->supported, feature->feature_num);
>
> if (en) {
> ret = smu_cmn_get_enabled_mask(smu, &feature_mask);
> @@ -817,8 +816,6 @@ int smu_v11_0_system_features_control(struct smu_context *smu,
>
> bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
> feature->feature_num);
> - bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
> - feature->feature_num);
> }
>
> return ret;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 65f30cdafc8f..478151e72889 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -1956,7 +1956,6 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
> RLC_STATUS_OFF, NULL);
>
> bitmap_zero(feature->enabled, feature->feature_num);
> - bitmap_zero(feature->supported, feature->feature_num);
>
> if (!en)
> return ret;
> @@ -1967,8 +1966,6 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
>
> bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
> feature->feature_num);
> - bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
> - feature->feature_num);
>
> return 0;
> }
> @@ -1985,7 +1982,7 @@ static int vangogh_post_smu_init(struct smu_context *smu)
> adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines;
>
> /* allow message will be sent after enable message on Vangogh*/
> - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_GFXCLK_BIT) &&
> + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) &&
> (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
> ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL);
> if (ret) {
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> index 6b77fc4b4698..92b5c1108a2e 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> @@ -774,7 +774,6 @@ int smu_v13_0_system_features_control(struct smu_context *smu,
> return ret;
>
> bitmap_zero(feature->enabled, feature->feature_num);
> - bitmap_zero(feature->supported, feature->feature_num);
>
> if (en) {
> ret = smu_cmn_get_enabled_mask(smu, &feature_mask);
> @@ -783,8 +782,6 @@ int smu_v13_0_system_features_control(struct smu_context *smu,
>
> bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
> feature->feature_num);
> - bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
> - feature->feature_num);
> }
>
> return ret;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> index a7aabb8079ed..d89e8a03651b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
> @@ -204,7 +204,6 @@ static int yellow_carp_system_features_control(struct smu_context *smu, bool en)
> ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
>
> bitmap_zero(feature->enabled, feature->feature_num);
> - bitmap_zero(feature->supported, feature->feature_num);
>
> if (!en)
> return ret;
> @@ -215,8 +214,6 @@ static int yellow_carp_system_features_control(struct smu_context *smu, bool en)
>
> bitmap_copy(feature->enabled, (unsigned long *)&feature_mask,
> feature->feature_num);
> - bitmap_copy(feature->supported, (unsigned long *)&feature_mask,
> - feature->feature_num);
>
> return 0;
> }
> --
> 2.29.0
>
>
Apologies if this has been reported or fixed already, I searched the
mailing list but did not see anything.
This change as commit 7ade3ca9cdb5 ("drm/amd/pm: correct the usage for
'supported' member of smu_feature structure") causes the following
errors/stacktrace when booting on a Ryzen 3 4300G based system, which
prevents me from reaching GNOME.
[ 6.125321] amdgpu 0000:0c:00.0: amdgpu: Failed to retrieve supported dpm features!
[ 6.125474] amdgpu 0000:0c:00.0: amdgpu: Failed to setup smc hw!
[ 6.125595] [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* hw_init of IP block <smu> failed -22
[ 6.125995] amdgpu 0000:0c:00.0: amdgpu: amdgpu_device_ip_init failed
[ 6.126126] amdgpu 0000:0c:00.0: amdgpu: Fatal error during GPU init
[ 6.160535] amdgpu: probe of 0000:0c:00.0 failed with error -22
[ 6.163849] ------------[ cut here ]------------
[ 6.163964] Memory manager not clean during takedown.
[ 6.164119] WARNING: CPU: 3 PID: 350 at drivers/gpu/drm/drm_mm.c:995 drm_mm_takedown+0x1f/0x30 [drm]
[ 6.164353] Modules linked in: hid_logitech_hidpp joydev mousedev rtw88_8821ce rtw88_8821c rtw88_pci rtw88_core vfat mac80211 fat btusb btrtl libarc4 btbcm btintel lzo_rle hp_wmi intel_rapl_msr cfg80211 sparse_keymap bluetooth amdgpu(+) platform_profile r8169 hid_logitech_dj ecdh_generic wmi_bmof realtek intel
_rapl_common mdio_devres libphy rfkill kvm_amd kvm irqbypass drm_ttm_helper crct10dif_pclmul ttm snd_hda_codec_realtek snd_hda_codec_hdmi crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel gpu_sched ledtrig_audio aesni_intel i2c_algo_bit snd_hda_intel crypto_simd snd_intel_dspcfg drm_kms_helper snd_hda_codec
cryptd rapl snd_hwdep syscopyarea sysfillrect snd_hda_core sysimgblt snd_pcm pcspkr fb_sys_fops sp5100_tco snd_timer k10temp ccp cec snd i2c_piix4 soundcore tpm_crb tpm_tis wmi tpm_tis_core tpm gpio_amdpt video rng_core pinctrl_amd gpio_generic acpi_tad mac_hid acpi_cpufreq usbhid pkcs8_key_parser sg crypto_user
fuse drm zram bpf_preload ip_tables x_tables ext4
[ 6.164393] crc32c_generic crc16 mbcache jbd2 xhci_pci crc32c_intel xhci_pci_renesas
[ 6.166704] CPU: 3 PID: 350 Comm: systemd-udevd Not tainted 5.16.0-rc5-debug-01621-g7ade3ca9cdb5 #1 486b1a72751606fa3f686487a0e195723dfbf5fa
[ 6.167024] Hardware name: HP HP Desktop M01-F1xxx/87D6, BIOS F.12 12/17/2020
[ 6.167207] RIP: 0010:drm_mm_takedown+0x1f/0x30 [drm]
[ 6.167359] Code: 48 e9 6a ff ff ff 0f 1f 44 00 00 0f 1f 44 00 00 48 8b 47 38 48 83 c7 38 48 39 f8 75 01 c3 48 c7 c7 b0 bc 3e c0 e8 cf 7e 4c c5 <0f> 0b c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 0f 1f 44 00 00
[ 6.167847] RSP: 0018:ffffaea74164f930 EFLAGS: 00010286
[ 6.167984] RAX: 0000000000000000 RBX: ffff8c69c43c0000 RCX: 0000000000000027
[ 6.168171] RDX: ffff8c70baae0728 RSI: 0000000000000001 RDI: ffff8c70baae0720
[ 6.168359] RBP: ffff8c69c43c5d88 R08: 0000000000000000 R09: ffffaea74164f760
[ 6.168544] R10: ffffaea74164f758 R11: ffff8c70de37ffe8 R12: ffff8c69c43c5d88
[ 6.168729] R13: ffff8c69c43c5d70 R14: ffff8c69c43c5d88 R15: ffff8c69c13b037c
[ 6.168921] FS: 00007ff533bb5a40(0000) GS:ffff8c70baac0000(0000) knlGS:0000000000000000
[ 6.169138] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6.169278] CR2: 00007f5b37c1ef10 CR3: 0000000108910000 CR4: 0000000000350ee0
[ 6.169461] Call Trace:
[ 6.169531] <TASK>
[ 6.169590] amdgpu_vram_mgr_fini+0xe0/0x150 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.170112] amdgpu_ttm_fini+0x11d/0x190 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.170553] amdgpu_bo_fini+0x2a/0x90 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.170986] gmc_v9_0_sw_fini+0x3a/0x40 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.171442] amdgpu_device_fini_sw+0xc9/0x340 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.171845] amdgpu_driver_release_kms+0x12/0x30 [amdgpu f875b0375176cd72cd7df8bb01e8902f994b9ac1]
[ 6.172252] devm_drm_dev_init_release+0x3d/0x60 [drm c0bf7c2e07a173ae66922afeaf398daf07c4aa7c]
[ 6.172497] release_nodes+0x38/0xb0
[ 6.172594] devres_release_all+0x8b/0xc0
[ 6.172701] really_probe+0x100/0x3f0
[ 6.172799] __driver_probe_device+0xfe/0x180
[ 6.172913] driver_probe_device+0x1e/0x90
[ 6.173056] __driver_attach+0xc0/0x1c0
[ 6.173159] ? __device_attach_driver+0xe0/0xe0
[ 6.173281] ? __device_attach_driver+0xe0/0xe0
[ 6.173403] bus_for_each_dev+0x89/0xd0
[ 6.173508] bus_add_driver+0x149/0x1e0
[ 6.173609] driver_register+0x8f/0xe0
[ 6.173709] ? 0xffffffffc12c6000
[ 6.173796] do_one_initcall+0x57/0x220
[ 6.173898] do_init_module+0x5c/0x270
[ 6.173999] load_module+0x258a/0x2760
[ 6.174102] ? __do_sys_init_module+0x12e/0x1b0
[ 6.174223] __do_sys_init_module+0x12e/0x1b0
[ 6.174344] do_syscall_64+0x5c/0x80
[ 6.174445] ? __vm_munmap+0x92/0x120
[ 6.174546] ? syscall_exit_to_user_mode+0x23/0x40
[ 6.174671] ? do_syscall_64+0x69/0x80
[ 6.174770] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 6.174902] RIP: 0033:0x7ff5345ab32e
[ 6.175000] Code: 48 8b 0d 45 0b 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 12 0b 0c 00 f7 d8 64 89 01 48
[ 6.175477] RSP: 002b:00007ffeccdd2918 EFLAGS: 00000246 ORIG_RAX: 00000000000000af
[ 6.175674] RAX: ffffffffffffffda RBX: 00005646206cfdd0 RCX: 00007ff5345ab32e
[ 6.175851] RDX: 00007ff5346ff32c RSI: 000000000103912e RDI: 00007ff531498010
[ 6.176032] RBP: 00007ff531498010 R08: 00007ff532a7d000 R09: 0000000000000000
[ 6.176215] R10: 00005646207e5400 R11: 0000000000000246 R12: 00007ff5346ff32c
[ 6.176443] R13: 00005646206cb670 R14: 00005646206cfdd0 R15: 0000564620660e80
[ 6.176633] </TASK>
[ 6.176694] ---[ end trace 72edbf4275a2e0d9 ]---
If there is any other information I can provide or patches I can test, I
am happy to do so.
Cheers,
Nathan
More information about the amd-gfx
mailing list