[PATCH 3/4] drm/amdgpu: add reset register trace dump function

Christian König christian.koenig at amd.com
Mon Jan 24 07:15:17 UTC 2022


Am 21.01.22 um 21:34 schrieb Sharma, Shashank:
> From 1c5c552eeddaffd9fb3e7d45ece1b2b28fccc575 Mon Sep 17 00:00:00 2001
> From: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> Date: Fri, 21 Jan 2022 14:19:10 +0530
> Subject: [PATCH 3/4] drm/amdgpu: add reset register trace dump 
> function for
>  gfx_v10_0
>
> Implementation of register trace dump function on the AMD GPU resets
>
> Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  8 ++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c    | 53 ++++++++++++++++++++++-
>  drivers/gpu/drm/amd/include/amd_shared.h  |  1 +
>  3 files changed, 60 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index d855cb53c7e0..c97b53b54333 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -537,6 +537,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
>                __entry->seqno)
>  );
>
> +TRACE_EVENT(gfx_v10_0_reset_reg_dumps,
> +        TP_PROTO(char *reg_dumps),
> +        TP_ARGS(reg_dumps),
> +        TP_STRUCT__entry(__string(dumps, reg_dumps)),
> +        TP_fast_assign(__assign_str(dumps, reg_dumps);),
> +        TP_printk("amdgpu register dump {%s}", __get_str(dumps))
> +);
> +
>  #undef AMDGPU_JOB_GET_TIMELINE_NAME
>  #endif
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 16dbe593cba2..05974ed5416d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -46,7 +46,7 @@
>  #include "v10_structs.h"
>  #include "gfx_v10_0.h"
>  #include "nbio_v2_3.h"
> -
> +#include "amdgpu_trace.h"
>  /*
>   * Navi10 has two graphic rings to share each graphic pipe.
>   * 1. Primary ring
> @@ -188,6 +188,12 @@
>  #define RLCG_ERROR_REPORT_ENABLED(adev) \
>      (amdgpu_sriov_reg_indirect_mmhub(adev) || 
> amdgpu_sriov_reg_indirect_gc(adev))
>
> +#define N_REGS (17)
> +#define DUMP_REG(addr) do {                    \
> +        (dump)[i][0] = (addr);                \
> +        (dump)[i++][1] =  RREG32_SOC15_IP(GC, addr);    \
> +    } while (0)

Those macros need an AMDGPU_ prefix and maybe some better names.

 From the design POV I'm really wondering if it wouldn't be better if 
userspace defines the registers we want to dump in case of a crash.

Regards,
Christian.

> +
>  MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
>  MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
>  MODULE_FIRMWARE("amdgpu/navi10_me.bin");
> @@ -7488,7 +7494,6 @@ static int gfx_v10_0_hw_init(void *handle)
>  {
>      int r;
>      struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -
>      if (!amdgpu_emu_mode)
>          gfx_v10_0_init_golden_registers(adev);
>
> @@ -7602,6 +7607,49 @@ static int gfx_v10_0_hw_fini(void *handle)
>      return 0;
>  }
>
> +static int gfx_v10_0_reset_reg_dumps(void *handle)
> +{
> +    struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +    uint32_t i = 0;
> +    uint32_t (*dump)[2], n_regs = 0;
> +    char *reg_dumps;
> +
> +    dump = kmalloc(N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> +    reg_dumps = kmalloc(1024, GFP_KERNEL);
> +
> +    if (dump == NULL || reg_dumps == NULL)
> +        return -ENOMEM;
> +
> +    DUMP_REG(mmGRBM_STATUS2);
> +    DUMP_REG(mmGRBM_STATUS_SE0);
> +    DUMP_REG(mmGRBM_STATUS_SE1);
> +    DUMP_REG(mmGRBM_STATUS_SE2);
> +    DUMP_REG(mmGRBM_STATUS_SE3);
> +    DUMP_REG(mmSDMA0_STATUS_REG);
> +    DUMP_REG(mmSDMA1_STATUS_REG);
> +    DUMP_REG(mmCP_STAT);
> +    DUMP_REG(mmCP_STALLED_STAT1);
> +    DUMP_REG(mmCP_STALLED_STAT1);
> +    DUMP_REG(mmCP_STALLED_STAT3);
> +    DUMP_REG(mmCP_CPC_STATUS);
> +    DUMP_REG(mmCP_CPC_BUSY_STAT);
> +    DUMP_REG(mmCP_CPC_STALLED_STAT1);
> +    DUMP_REG(mmCP_CPF_STATUS);
> +    DUMP_REG(mmCP_CPF_BUSY_STAT);
> +    DUMP_REG(mmCP_CPF_STALLED_STAT1);
> +
> +    n_regs = i;
> +
> +    for (i = 0; i < n_regs; i++)
> +        sprintf(reg_dumps + strlen(reg_dumps), "%08x: %08x, ", 
> dump[i][0], dump[i][1]);
> +
> +    trace_gfx_v10_0_reset_reg_dumps(reg_dumps);
> +    kfree(dump);
> +    kfree(reg_dumps);
> +
> +    return 0;
> +}
> +
>  static int gfx_v10_0_suspend(void *handle)
>  {
>      return gfx_v10_0_hw_fini(handle);
> @@ -9367,6 +9415,7 @@ static const struct amd_ip_funcs 
> gfx_v10_0_ip_funcs = {
>      .set_clockgating_state = gfx_v10_0_set_clockgating_state,
>      .set_powergating_state = gfx_v10_0_set_powergating_state,
>      .get_clockgating_state = gfx_v10_0_get_clockgating_state,
> +    .reset_reg_dumps = gfx_v10_0_reset_reg_dumps,
>  };
>
>  static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h 
> b/drivers/gpu/drm/amd/include/amd_shared.h
> index 257f280d3d53..76d3a55278df 100644
> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> @@ -296,6 +296,7 @@ struct amd_ip_funcs {
>                       enum amd_powergating_state state);
>      void (*get_clockgating_state)(void *handle, u32 *flags);
>      int (*enable_umd_pstate)(void *handle, enum amd_dpm_forced_level 
> *level);
> +    int (*reset_reg_dumps)(void *handle);
>  };
>
>



More information about the amd-gfx mailing list