[PATCH] drm/amdgpu: Add GPU reset functionality for Vega10
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Sep 15 07:12:28 UTC 2017
Am 15.09.2017 um 08:57 schrieb Ken.Wang at amd.com:
> From: Ken Wang <Ken.Wang at amd.com>
>
> Signed-off-by: Ken Wang <Ken.Wang at amd.com>
> Change-Id: I6fd2c216a84747313f18db25a444be5ed43b4f4b
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 10 +++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 3 +++
> drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 7 ++++++
> drivers/gpu/drm/amd/amdgpu/psp_v10_0.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 36 ++++++++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/psp_v3_1.h | 1 +
> drivers/gpu/drm/amd/amdgpu/soc15.c | 39 ++++++++++++++++++++----------
> 8 files changed, 89 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index fc1c5437..4196786 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2883,7 +2883,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
> int resched;
> bool need_full_reset, vram_lost = false;
>
> - if (!amdgpu_check_soft_reset(adev)) {
> + if (!amdgpu_check_soft_reset(adev) && (adev->asic_type != CHIP_VEGA10)) {
> DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
> return 0;
> }
NAK, please implement this as check_soft_reset() callback for the PSP block.
> @@ -2910,6 +2910,10 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
>
> need_full_reset = amdgpu_need_full_reset(adev);
>
> + /* no soft reset for vega10 right now*/
> + if (adev->asic_type == CHIP_VEGA10)
> + need_full_reset = true;
> +
NAK again, please just modify amdgpu_need_full_reset() to include the PSP.
> if (!need_full_reset) {
> amdgpu_pre_soft_reset(adev);
> r = amdgpu_soft_reset(adev);
> @@ -2924,6 +2928,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
> r = amdgpu_suspend(adev);
>
> retry:
> +
Unrelated whitespace change.
> amdgpu_atombios_scratch_regs_save(adev);
> r = amdgpu_asic_reset(adev);
> amdgpu_atombios_scratch_regs_restore(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 8a1ee97..6942b89 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -62,6 +62,7 @@ static int psp_sw_init(void *handle)
> psp->cmd_submit = psp_v3_1_cmd_submit;
> psp->compare_sram_data = psp_v3_1_compare_sram_data;
> psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
> + psp->mode1_reset = psp_v3_1_mode1_reset;
> break;
> case CHIP_RAVEN:
> psp->init_microcode = psp_v10_0_init_microcode;
> @@ -72,6 +73,7 @@ static int psp_sw_init(void *handle)
> psp->ring_destroy = psp_v10_0_ring_destroy;
> psp->cmd_submit = psp_v10_0_cmd_submit;
> psp->compare_sram_data = psp_v10_0_compare_sram_data;
> + psp->mode1_reset = psp_v10_0_mode1_reset;
> break;
> default:
> return -EINVAL;
> @@ -497,6 +499,12 @@ static int psp_resume(void *handle)
> return ret;
> }
>
> +static int psp_reset(void* handle)
> +{
> + struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> + return psp_mode1_reset(&adev->psp);
> +}
> +
> static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
> enum AMDGPU_UCODE_ID ucode_type)
> {
> @@ -541,7 +549,7 @@ const struct amd_ip_funcs psp_ip_funcs = {
> .resume = psp_resume,
> .is_idle = NULL,
> .wait_for_idle = NULL,
> - .soft_reset = NULL,
> + .soft_reset = psp_reset,
> .set_clockgating_state = psp_set_clockgating_state,
> .set_powergating_state = psp_set_powergating_state,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 1b7d12d..ce465455 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -76,6 +76,7 @@ struct psp_context
> struct amdgpu_firmware_info *ucode,
> enum AMDGPU_UCODE_ID ucode_type);
> bool (*smu_reload_quirk)(struct psp_context *psp);
> + int (*mode1_reset)(struct psp_context *psp);
>
> /* fence buffer */
> struct amdgpu_bo *fw_pri_bo;
> @@ -139,6 +140,8 @@ struct amdgpu_psp_funcs {
> ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
> #define psp_smu_reload_quirk(psp) \
> ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
> +#define psp_mode1_reset(psp) \
> + ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false)
>
> extern const struct amd_ip_funcs psp_ip_funcs;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
> index 6b324da..cbc43bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
> @@ -406,3 +406,10 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp,
>
> return true;
> }
> +
> +
> +int psp_v10_0_mode1_reset(struct psp_context *psp)
> +{
> + DRM_INFO("psp mode 1 reset not supported now! \n");
> + return -EINVAL;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
> index 3af3ad1..451e830 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
> @@ -45,4 +45,6 @@ extern int psp_v10_0_cmd_submit(struct psp_context *psp,
> extern bool psp_v10_0_compare_sram_data(struct psp_context *psp,
> struct amdgpu_firmware_info *ucode,
> enum AMDGPU_UCODE_ID ucode_type);
> +
> +extern int psp_v10_0_mode1_reset(struct psp_context *psp);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> index eb73931..01a896e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> @@ -517,7 +517,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
> ucode_size -= 4;
> }
>
> - return true;
> + return true;
Dito unnecessary whitespace change.
> }
>
> bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
> @@ -530,3 +530,37 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
> reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2);
> return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
> }
> +
> +int psp_v3_1_mode1_reset(struct psp_context *psp)
> +{
> + int ret;
> + uint32_t offset;
> + struct amdgpu_device *adev = psp->adev;
> +
> + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
> +
> + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
> +
> + if (ret) {
> + DRM_INFO("psp is not working correctly before mode1 reset!\n");
> + return -EINVAL;
> + }
> +
> + /*send the mode 1 reset command*/
> + WREG32(offset, 0x70000);
> +
> + mdelay(1000);
> +
> + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
> +
> + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
> +
> + if (ret) {
> + DRM_INFO("psp mode 1 reset failed!\n");
> + return -EINVAL;
> + }
> +
> + DRM_INFO("psp mode1 reset succeed \n");
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
> index 5af2231..b05dbad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
> @@ -53,4 +53,5 @@ extern bool psp_v3_1_compare_sram_data(struct psp_context *psp,
> struct amdgpu_firmware_info *ucode,
> enum AMDGPU_UCODE_ID ucode_type);
> extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp);
> +extern int psp_v3_1_mode1_reset(struct psp_context *psp);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index a74d616..190be09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -407,18 +407,37 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
> return -EINVAL;
> }
>
> -static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
> +static int soc15_asic_reset(struct amdgpu_device *adev)
> {
> u32 i;
> + u32 *pci_regs;
> +
> + pci_regs = kmalloc(128 * sizeof(u32), GFP_KERNEL);
> +
> + if (pci_regs == NULL)
> + return -EINVAL;
-ENOMEM would be more appropriate.
> +
> + amdgpu_atombios_scratch_regs_engine_hung(adev, true);
>
> - dev_info(adev->dev, "GPU pci config reset\n");
> + dev_info(adev->dev, "GPU reset\n");
>
> /* disable BM */
> pci_clear_master(adev->pdev);
> - /* reset */
> - amdgpu_pci_config_reset(adev);
>
> - udelay(100);
> + for (i = 0; i < 128; i++) {
> + pci_read_config_dword(adev->pdev, i*4, &pci_regs[i]);
> + }
No "{" and "}" here.
> +
> + for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) {
> + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){
> + adev->ip_blocks[i].version->funcs->soft_reset((void *)adev);
> + break;
> + }
> + }
> +
> + for (i = 0; i < 128; i++) {
> + pci_write_config_dword(adev->pdev, i*4, pci_regs[i]);
> + }
Dito.
BTW: Doesn't the PCI subsystems have helpers for this?
Regards,
Christian.
>
> /* wait for asic to come out of reset */
> for (i = 0; i < adev->usec_timeout; i++) {
> @@ -430,16 +449,10 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
> udelay(1);
> }
>
> -}
> -
> -static int soc15_asic_reset(struct amdgpu_device *adev)
> -{
> - amdgpu_atombios_scratch_regs_engine_hung(adev, true);
> -
> - soc15_gpu_pci_config_reset(adev);
> -
> amdgpu_atombios_scratch_regs_engine_hung(adev, false);
>
> + kfree(pci_regs);
> +
> return 0;
> }
>
More information about the amd-gfx
mailing list