[PATCH 068/159] drm/amdgpu:add smu mode1/2 support for aldebaran
Alex Deucher
alexander.deucher at amd.com
Wed Feb 24 22:17:28 UTC 2021
From: Feifei Xu <Feifei.Xu at amd.com>
Use MSG_GfxDriverReset for mode reset and retire MSG_Mode1Reset.
Centralize soc15_asic_mode1_reset() and nv_asic_mode1_reset()functions.
Add mode2_reset_is_support() for smu->ppt_funcs.
Signed-off-by: Feifei Xu <Feifei.Xu at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 39 +++++++++++++
drivers/gpu/drm/amd/amdgpu/nv.c | 40 +------------
drivers/gpu/drm/amd/amdgpu/soc15.c | 56 +++++++------------
drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h | 25 +++++++--
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 5 ++
drivers/gpu/drm/amd/pm/inc/smu_types.h | 1 +
drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 1 +
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 20 +++++++
.../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 32 ++++++++++-
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 26 ++++++++-
11 files changed, 164 insertions(+), 82 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0035c91943f6..0b3913d2f568 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1245,6 +1245,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
bool amdgpu_device_supports_atpx(struct drm_device *dev);
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
bool amdgpu_device_supports_boco(struct drm_device *dev);
bool amdgpu_device_supports_baco(struct drm_device *dev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8cb9ac385840..cdec53bfead0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4241,6 +4241,45 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
return false;
}
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ dev_info(adev->dev, "GPU mode1 reset\n");
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+ return ret;
+}
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 160fa5f59805..3cbe51108256 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -304,44 +304,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-static int nv_asic_mode1_reset(struct amdgpu_device *adev)
-{
- u32 i;
- int ret = 0;
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-
- /* disable BM */
- pci_clear_master(adev->pdev);
-
- amdgpu_device_cache_pci_state(adev->pdev);
-
- if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
- dev_info(adev->dev, "GPU smu mode1 reset\n");
- ret = amdgpu_dpm_mode1_reset(adev);
- } else {
- dev_info(adev->dev, "GPU psp mode1 reset\n");
- ret = psp_gpu_reset(adev);
- }
-
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
- amdgpu_device_load_pci_state(adev->pdev);
-
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
-
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-
- return ret;
-}
-
static int nv_asic_mode2_reset(struct amdgpu_device *adev)
{
u32 i;
@@ -444,7 +406,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
break;
default:
dev_info(adev->dev, "MODE1 reset\n");
- ret = nv_asic_mode1_reset(adev);
+ ret = amdgpu_device_mode1_reset(adev);
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index c45904b4ae23..275408c178f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -420,40 +420,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
}
-static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
-{
- u32 i;
- int ret = 0;
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-
- dev_info(adev->dev, "GPU mode1 reset\n");
-
- /* disable BM */
- pci_clear_master(adev->pdev);
-
- amdgpu_device_cache_pci_state(adev->pdev);
-
- ret = psp_gpu_reset(adev);
- if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
-
- amdgpu_device_load_pci_state(adev->pdev);
-
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio.funcs->get_memsize(adev);
-
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-
- return ret;
-}
-
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -478,13 +444,21 @@ static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
bool baco_reset = false;
+ bool connected_to_cpu = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)
+ connected_to_cpu = true;
+
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
- amdgpu_reset_method == AMD_RESET_METHOD_PCI)
- return amdgpu_reset_method;
+ amdgpu_reset_method == AMD_RESET_METHOD_PCI) {
+ /* If connected to cpu, driver only support mode2 */
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
+ return amdgpu_reset_method;
+ }
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
@@ -510,6 +484,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
baco_reset = false;
break;
+ case CHIP_ALDEBARAN:
+ /*
+ * 1.connected to cpu: driver issue mode2 reset
+ * 2.discret gpu: driver issue mode1 reset
+ */
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
+ break;
default:
break;
}
@@ -539,7 +521,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
return amdgpu_dpm_mode2_reset(adev);
default:
dev_info(adev->dev, "MODE1 reset\n");
- return soc15_asic_mode1_reset(adev);
+ return amdgpu_device_mode1_reset(adev);
}
}
diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
index 302888376c7c..433dd1e9ec4f 100644
--- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
@@ -36,7 +36,7 @@
// Message Definitions:
#define PPSMC_MSG_TestMessage 0x1
#define PPSMC_MSG_GetSmuVersion 0x2
-#define PPSMC_MSG_Mode1Reset 0x3
+#define PPSMC_MSG_GfxDriverReset 0x3
#define PPSMC_MSG_GetDriverIfVersion 0x4
#define PPSMC_MSG_spare1 0x5
#define PPSMC_MSG_spare2 0x6
@@ -70,8 +70,8 @@
#define PPSMC_MSG_SetPptLimit 0x22
#define PPSMC_MSG_GetPptLimit 0x23
#define PPSMC_MSG_PrepareMp1ForUnload 0x24
-#define PPSMC_MSG_PrepareMp1ForReset 0x25
-#define PPSMC_MSG_SoftReset 0x26
+#define PPSMC_MSG_PrepareMp1ForReset 0x25 //retired in 68.07
+#define PPSMC_MSG_SoftReset 0x26 //retired in 68.07
#define PPSMC_MSG_RunDcBtc 0x27
#define PPSMC_MSG_DramLogSetDramAddrHigh 0x28
#define PPSMC_MSG_DramLogSetDramAddrLow 0x29
@@ -92,7 +92,24 @@
#define PPSMC_MSG_DisableDeterminism 0x3A
#define PPSMC_MSG_SetUclkDpmMode 0x3B
-#define PPSMC_Message_Count 0x3C
+//STB to dram log
+#define PPSMC_MSG_DumpSTBtoDram 0x3C
+#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3D
+#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3E
+#define PPSMC_MSG_STBtoDramLogSetDramSize 0x3F
+#define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrHigh 0x40
+#define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41
+
+#define PPSMC_Message_Count 0x42
+
+//PPSMC Reset Types
+#define PPSMC_RESET_TYPE_WARM_RESET 0x00
+#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x01 //driver msg argument should be 1 for mode-1
+#define PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET 0x02 //and 2 for mode-2
+#define PPSMC_RESET_TYPE_PCIE_LINK_RESET 0x03
+#define PPSMC_RESET_TYPE_BIF_LINK_RESET 0x04
+#define PPSMC_RESET_TYPE_PF0_FLR_RESET 0x05
+
typedef enum {
GFXOFF_ERROR_NO_ERROR,
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index a78c8c9220fd..8ad87ddb342c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1044,6 +1044,10 @@ struct pptable_funcs {
* @mode1_reset_is_support: Check if GPU supports mode1 reset.
*/
bool (*mode1_reset_is_support)(struct smu_context *smu);
+ /**
+ * @mode2_reset_is_support: Check if GPU supports mode2 reset.
+ */
+ bool (*mode2_reset_is_support)(struct smu_context *smu);
/**
* @mode1_reset: Perform mode1 reset.
@@ -1281,6 +1285,7 @@ int smu_baco_enter(struct smu_context *smu);
int smu_baco_exit(struct smu_context *smu);
bool smu_mode1_reset_is_support(struct smu_context *smu);
+bool smu_mode2_reset_is_support(struct smu_context *smu);
int smu_mode1_reset(struct smu_context *smu);
int smu_mode2_reset(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h
index e9a0bda98fd7..207d5d923c9e 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h
@@ -184,6 +184,7 @@
__SMU_DUMMY_MAP(GET_UMC_FW_WA), \
__SMU_DUMMY_MAP(Mode1Reset), \
__SMU_DUMMY_MAP(RlcPowerNotify), \
+ __SMU_DUMMY_MAP(GfxDriverReset), \
__SMU_DUMMY_MAP(SetHardMinIspiclkByFreq), \
__SMU_DUMMY_MAP(SetHardMinIspxclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMinSocclkByFreq), \
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index 699b656bbd71..ef9dad9a51ff 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -220,6 +220,7 @@ int smu_v13_0_baco_enter(struct smu_context *smu);
int smu_v13_0_baco_exit(struct smu_context *smu);
int smu_v13_0_mode1_reset(struct smu_context *smu);
+int smu_v13_0_mode2_reset(struct smu_context *smu);
int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
uint32_t *min, uint32_t *max);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b1c30a6cb16e..cf81b2ac2232 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1819,6 +1819,9 @@ int smu_set_mp1_state(struct smu_context *smu,
msg = SMU_MSG_PrepareMp1ForUnload;
break;
case PP_MP1_STATE_RESET:
+ /*TODO: since the SMU_MSG_PrepareMp1ForReset is retired in Aldebaran
+ * Add handling here forAldebaran.
+ */
msg = SMU_MSG_PrepareMp1ForReset;
break;
case PP_MP1_STATE_NONE:
@@ -2581,6 +2584,23 @@ bool smu_mode1_reset_is_support(struct smu_context *smu)
return ret;
}
+bool smu_mode2_reset_is_support(struct smu_context *smu)
+{
+ bool ret = false;
+
+ if (!smu->pm_enabled)
+ return false;
+
+ mutex_lock(&smu->mutex);
+
+ if (smu->ppt_funcs && smu->ppt_funcs->mode2_reset_is_support)
+ ret = smu->ppt_funcs->mode2_reset_is_support(smu);
+
+ mutex_unlock(&smu->mutex);
+
+ return ret;
+}
+
int smu_mode1_reset(struct smu_context *smu)
{
int ret = 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index bdefb9078847..475bd5aff6c9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -45,6 +45,7 @@
#include <linux/pci.h>
#include "amdgpu_ras.h"
#include "smu_cmn.h"
+#include "mp/mp_13_0_2_offset.h"
/*
* DO NOT use these for err/warn/info/debug messages.
@@ -108,7 +109,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
MSG_MAP(PrepareMp1ForReset, PPSMC_MSG_PrepareMp1ForReset, 0),
- MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
+ MSG_MAP(GfxDriverReset, PPSMC_MSG_GfxDriverReset, 0),
MSG_MAP(SoftReset, PPSMC_MSG_SoftReset, 0),
MSG_MAP(RunDcBtc, PPSMC_MSG_RunDcBtc, 0),
MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
@@ -1250,6 +1251,31 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
return sizeof(struct gpu_metrics_v1_0);
}
+static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ u32 smu_version;
+ uint32_t val;
+ /**
+ * PM FW version support mode1 reset from 68.07
+ */
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if ((smu_version < 0x00440700))
+ return false;
+ /**
+ * mode1 reset relies on PSP, so we should check if
+ * PSP is alive.
+ */
+ val = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return val != 0x0;
+}
+
+static bool aldebaran_is_mode2_reset_supported(struct smu_context *smu)
+{
+ return true;
+}
+
static const struct pptable_funcs aldebaran_ppt_funcs = {
/* init dpm */
.get_allowed_feature_mask = aldebaran_get_allowed_feature_mask,
@@ -1305,6 +1331,10 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.get_gpu_metrics = aldebaran_get_gpu_metrics,
+ .mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
+ .mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
+ .mode1_reset = smu_v13_0_mode1_reset,
+ .mode2_reset = smu_v13_0_mode2_reset,
};
void aldebaran_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 20fff2fda13f..15033caeacb7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1349,15 +1349,39 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu)
int smu_v13_0_mode1_reset(struct smu_context *smu)
{
+ u32 smu_version;
int ret = 0;
+ /*
+ * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
+ */
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if (smu_version < 0x00440700)
+ ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+ else
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
- ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
if (!ret)
msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
return ret;
}
+int smu_v13_0_mode2_reset(struct smu_context *smu)
+{
+ u32 smu_version;
+ int ret = 0;
+ struct amdgpu_device *adev = smu->adev;
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if (smu_version >= 0x00440700)
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_2, NULL);
+ else
+ dev_err(adev->dev, "smu fw 0x%x does not support MSG_GfxDeviceDriverReset MSG\n", smu_version);
+ /*TODO: mode2 reset wait time should be shorter, will modify it later*/
+ if (!ret)
+ msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
+ return ret;
+}
+
int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
uint32_t *min, uint32_t *max)
{
--
2.29.2
More information about the amd-gfx
mailing list