[PATCH v2] drm/amdgpu: fix the ib test hang when gfx is in "idle" state

Alex Deucher alexdeucher at gmail.com
Tue Apr 24 15:36:23 UTC 2018


On Mon, Apr 23, 2018 at 10:49 PM, Huang Rui <ray.huang at amd.com> wrote:
> "aaabaf4   drm/amdgpu: defer test IBs on the rings at boot (V3)"
> Above patch defers the execution of gfx/compute ib tests. However, at that time,
> the gfx may already go into idle state. If "idle" gfx receives command
> submission, then will get hang in the system. And it still has issue to
> dynamically enable/disable gfxoff at runtime. So we have to use a workaround to
> skip the gfx/compute ib tests when gfx is already in "idle" state.

I still don't understand why we need this patch.  If the IB test won't
work with gfxoff enabled, I don't see how an arbitrary IB from
userspace would work.  They are the same thing.  Do the libdrm unit
tests pass with gfxoff enabled?  Those mostly don't do any draws or
dispatches.  Is the idea that we need to manually power up/down gfx
similar to how we support UVD or VCE power gating?

Alex

>
> Signed-off-by: Huang Rui <ray.huang at amd.com>
> Cc: Shirish S <shirish.s at amd.com>
> ---
>
> Changes from V1 -> V2:
> - Remove unused definitions of smu10_hwmgr.
> - Add WA descriptions into commit log and comments.
>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h               |  2 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 23 ++++++++++++++++++++++-
>  drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 22 ++--------------------
>  3 files changed, 26 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 59df4b7..a0263b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -905,6 +905,7 @@ struct amdgpu_gfx_funcs {
>         void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
>         void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
>         void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
> +       bool (*is_gfx_on)(struct amdgpu_device *adev);
>  };
>
>  struct amdgpu_ngg_buf {
> @@ -1855,6 +1856,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>  #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
>  #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
>  #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
> +#define amdgpu_gfx_is_gfx_on(adev) (adev)->gfx.funcs->is_gfx_on((adev))
>
>  /* Common functions */
>  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 2c5e2a4..b8bd194 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -342,6 +342,18 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
>         return r;
>  }
>
> +static bool gfx_v9_0_is_gfx_on(struct amdgpu_device *adev)
> +{
> +       uint32_t reg;
> +
> +       reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> +       if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> +           (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> +               return true;
> +
> +       return false;
> +}
> +
>  static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -353,6 +365,14 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>         uint32_t tmp;
>         long r;
>
> +       /*
> +        * FIXME: It still has issue to dynamically enable/disable gfxoff at
> +        * runtime. So it has to skip the gfx/compute ib test when gfx is
> +        * already in "idle" state.
> +        */
> +       if (!amdgpu_gfx_is_gfx_on(adev))
> +               return 0;
> +
>         r = amdgpu_device_wb_get(adev, &index);
>         if (r) {
>                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
> @@ -1085,7 +1105,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
>         .read_wave_data = &gfx_v9_0_read_wave_data,
>         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
>         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
> -       .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
> +       .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
> +       .is_gfx_on = &gfx_v9_0_is_gfx_on
>  };
>
>  static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> index 7712eb6..48c17fb 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
> @@ -42,12 +42,6 @@
>  #define SMU10_DISPCLK_BYPASS_THRESHOLD     10000 /* 100Mhz */
>  #define SMC_RAM_END                     0x40000
>
> -#define mmPWR_MISC_CNTL_STATUS                                 0x0183
> -#define mmPWR_MISC_CNTL_STATUS_BASE_IDX                                0
> -#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT       0x0
> -#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT         0x1
> -#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK         0x00000001L
> -#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK           0x00000006L
>
>  static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic;
>
> @@ -254,28 +248,16 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
>         return smu10_reset_cc6_data(hwmgr);
>  }
>
> -static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
> -{
> -       uint32_t reg;
> -       struct amdgpu_device *adev = hwmgr->adev;
> -
> -       reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
> -       if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
> -           (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
> -               return true;
> -
> -       return false;
> -}
> -
>  static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
>  {
>         struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
> +       struct amdgpu_device *adev = hwmgr->adev;
>
>         if (smu10_data->gfx_off_controled_by_driver) {
>                 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
>
>                 /* confirm gfx is back to "on" state */
> -               while (!smu10_is_gfx_on(hwmgr))
> +               while (!amdgpu_gfx_is_gfx_on(adev))
>                         msleep(1);
>         }
>
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list