[PATCH v2 5/5] drm/etnaviv: take current primitive into account when checking for hung GPU

Christian Gmeiner christian.gmeiner at gmail.com
Sun Jul 7 20:42:20 UTC 2024


>
> Large draws can make the GPU appear to be stuck to the current hangcheck
> logic as the FE address will not move until the draw is finished. However,
> the FE has a debug register, which records the current primitive ID within
> a draw. Using this debug register we can extend the timeout as long as the
> draw progresses.
>
> Signed-off-by: Lucas Stach <l.stach at pengutronix.de>

Reviewed-by: Christian Gmeiner <cgmeiner at igalia.com>

> ---
> v2:
> - dropped debug register enable/disable
> - locked against concurrent debug register access
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h   |  1 +
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c | 17 +++++++++++++++--
>  2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index 31322195b9e4..4d8a7d48ade3 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -144,6 +144,7 @@ struct etnaviv_gpu {
>
>         /* hang detection */
>         u32 hangcheck_dma_addr;
> +       u32 hangcheck_primid;
>         u32 hangcheck_fence;
>
>         void __iomem *mmio;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index 62dcfdc7894d..01f927430bc6 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -11,6 +11,7 @@
>  #include "etnaviv_gpu.h"
>  #include "etnaviv_sched.h"
>  #include "state.xml.h"
> +#include "state_hi.xml.h"
>
>  static int etnaviv_job_hang_limit = 0;
>  module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444);
> @@ -35,7 +36,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
>  {
>         struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
>         struct etnaviv_gpu *gpu = submit->gpu;
> -       u32 dma_addr;
> +       u32 dma_addr, primid = 0;
>         int change;
>
>         /*
> @@ -52,10 +53,22 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
>          */
>         dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
>         change = dma_addr - gpu->hangcheck_dma_addr;
> +       if (submit->exec_state == ETNA_PIPE_3D) {
> +               /* guard against concurrent usage from perfmon_sample */
> +               mutex_lock(&gpu->lock);
> +               gpu_write(gpu, VIVS_MC_PROFILE_CONFIG0,
> +                         VIVS_MC_PROFILE_CONFIG0_FE_CURRENT_PRIM <<
> +                         VIVS_MC_PROFILE_CONFIG0_FE__SHIFT);
> +               primid = gpu_read(gpu, VIVS_MC_PROFILE_FE_READ);
> +               mutex_unlock(&gpu->lock);
> +       }
>         if (gpu->state == ETNA_GPU_STATE_RUNNING &&
>             (gpu->completed_fence != gpu->hangcheck_fence ||
> -            change < 0 || change > 16)) {
> +            change < 0 || change > 16 ||
> +            (submit->exec_state == ETNA_PIPE_3D &&
> +             gpu->hangcheck_primid != primid))) {
>                 gpu->hangcheck_dma_addr = dma_addr;
> +               gpu->hangcheck_primid = primid;
>                 gpu->hangcheck_fence = gpu->completed_fence;
>                 goto out_no_timeout;
>         }
> --
> 2.39.2
>


-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info/privacypolicy


More information about the dri-devel mailing list