[PATCH 4/5] drm/amdgpu: revert context to stop engine before mode2 reset
Andrey Grodzovsky
andrey.grodzovsky at amd.com
Mon Jul 25 21:19:32 UTC 2022
On 2022-07-22 03:34, Victor Zhao wrote:
> For some hang caused by slow tests, engine cannot be stopped which
> may cause resume failure after reset. In this case, force halt
> engine by reverting context addresses
Can you maybe explain a bit more what exactly you mean by slow test and
why engine cannot be stopped in this case ?
Andrey
>
> Signed-off-by: Victor Zhao <Victor.Zhao at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 36 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 2 ++
> 4 files changed, 40 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5498fda8617f..833dc5e224d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5037,6 +5037,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>
> /* set guilty */
> drm_sched_increase_karma(s_job);
> + amdgpu_reset_prepare_hwcontext(adev, reset_context);
> retry:
> /* do hw reset */
> if (amdgpu_sriov_vf(adev)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
> index f8036f2b100e..c7b44aeb671b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
> @@ -37,6 +37,7 @@ struct amdgpu_gfxhub_funcs {
> void (*utcl2_harvest)(struct amdgpu_device *adev);
> void (*mode2_save_regs)(struct amdgpu_device *adev);
> void (*mode2_restore_regs)(struct amdgpu_device *adev);
> + void (*halt)(struct amdgpu_device *adev);
> };
>
> struct amdgpu_gfxhub {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> index 51cf8acd2d79..8cf53e039c11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> @@ -646,6 +646,41 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
> WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
> }
>
> +static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
> +{
> + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
> + int i;
> + uint32_t tmp;
> + int time = 1000;
> +
> + gfxhub_v2_1_set_fault_enable_default(adev, false);
> +
> + for (i = 0; i <= 14; i++) {
> + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
> + i * hub->ctx_addr_distance, ~0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
> + i * hub->ctx_addr_distance, ~0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
> + i * hub->ctx_addr_distance,
> + 0);
> + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
> + i * hub->ctx_addr_distance,
> + 0);
> + }
> + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
> + while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
> + GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
> + time) {
> + udelay(100);
> + time--;
> + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
> + }
> +
> + if (!time) {
> + DRM_WARN("failed to wait for GRBM(EA) idle\n");
> + }
> +}
> +
> const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
> .get_fb_location = gfxhub_v2_1_get_fb_location,
> .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset,
> @@ -658,4 +693,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
> .utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
> .mode2_save_regs = gfxhub_v2_1_save_regs,
> .mode2_restore_regs = gfxhub_v2_1_restore_regs,
> + .halt = gfxhub_v2_1_halt,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
> index 51a5b68f77d3..fead7251292f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
> @@ -97,6 +97,8 @@ sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
> if (!amdgpu_sriov_vf(adev)) {
> if (adev->gfxhub.funcs->mode2_save_regs)
> adev->gfxhub.funcs->mode2_save_regs(adev);
> + if (adev->gfxhub.funcs->halt)
> + adev->gfxhub.funcs->halt(adev);
> r = sienna_cichlid_mode2_suspend_ip(adev);
> }
>
More information about the amd-gfx
mailing list