[PATCH 06/14] drm/amdgpu: Add kfd2kgd APIs for dumping HQDs

Oded Gabbay oded.gabbay at gmail.com
Tue Dec 5 08:23:33 UTC 2017


On Tue, Nov 28, 2017 at 1:29 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> This can be used by KFD for debugging features, such as dumping
> HQDs in debugfs.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 71 ++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 80 +++++++++++++++++++++++
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 14 ++++
>  3 files changed, 165 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 14333af..12feba8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -105,8 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>                         uint32_t queue_id, uint32_t __user *wptr,
>                         uint32_t wptr_shift, uint32_t wptr_mask,
>                         struct mm_struct *mm);
> +static int kgd_hqd_dump(struct kgd_dev *kgd,
> +                       uint32_t pipe_id, uint32_t queue_id,
> +                       uint32_t (**dump)[2], uint32_t *n_regs);
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>                              uint32_t __user *wptr, struct mm_struct *mm);
> +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
> +                            uint32_t engine_id, uint32_t queue_id,
> +                            uint32_t (**dump)[2], uint32_t *n_regs);
>  static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
>                                 uint32_t pipe_id, uint32_t queue_id);
>
> @@ -178,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .init_interrupts = kgd_init_interrupts,
>         .hqd_load = kgd_hqd_load,
>         .hqd_sdma_load = kgd_hqd_sdma_load,
> +       .hqd_dump = kgd_hqd_dump,
> +       .hqd_sdma_dump = kgd_hqd_sdma_dump,
>         .hqd_is_occupied = kgd_hqd_is_occupied,
>         .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
>         .hqd_destroy = kgd_hqd_destroy,
> @@ -376,6 +384,42 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>         return 0;
>  }
>
> +static int kgd_hqd_dump(struct kgd_dev *kgd,
> +                       uint32_t pipe_id, uint32_t queue_id,
> +                       uint32_t (**dump)[2], uint32_t *n_regs)
> +{
> +       struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +       uint32_t i = 0, reg;
> +#define HQD_N_REGS (35+4)
> +#define DUMP_REG(addr) do {                            \
> +               if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
> +                       break;                          \
> +               (*dump)[i][0] = (addr) << 2;            \
> +               (*dump)[i++][1] = RREG32(addr);         \
> +       } while (0)
> +
> +       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> +       if (*dump == NULL)
> +               return -ENOMEM;
> +
> +       acquire_queue(kgd, pipe_id, queue_id);
> +
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
> +
> +       for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
> +               DUMP_REG(reg);
> +
> +       release_queue(kgd);
> +
> +       WARN_ON_ONCE(i != HQD_N_REGS);
> +       *n_regs = i;
> +
> +       return 0;
> +}
> +
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>                              uint32_t __user *wptr, struct mm_struct *mm)
>  {
> @@ -440,6 +484,33 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>         return 0;
>  }
>
> +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
> +                            uint32_t engine_id, uint32_t queue_id,
> +                            uint32_t (**dump)[2], uint32_t *n_regs)
> +{
> +       struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +       uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
> +               queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
> +       uint32_t i = 0, reg;
> +#undef HQD_N_REGS
> +#define HQD_N_REGS (19+4)
> +
> +       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> +       if (*dump == NULL)
> +               return -ENOMEM;
> +
> +       for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
> +               DUMP_REG(sdma_offset + reg);
> +       for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
> +            reg++)
> +               DUMP_REG(sdma_offset + reg);
> +
> +       WARN_ON_ONCE(i != HQD_N_REGS);
> +       *n_regs = i;
> +
> +       return 0;
> +}
> +
>  static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
>                                 uint32_t pipe_id, uint32_t queue_id)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 1d989e4..b380495 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -64,8 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>                         uint32_t queue_id, uint32_t __user *wptr,
>                         uint32_t wptr_shift, uint32_t wptr_mask,
>                         struct mm_struct *mm);
> +static int kgd_hqd_dump(struct kgd_dev *kgd,
> +                       uint32_t pipe_id, uint32_t queue_id,
> +                       uint32_t (**dump)[2], uint32_t *n_regs);
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>                              uint32_t __user *wptr, struct mm_struct *mm);
> +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
> +                            uint32_t engine_id, uint32_t queue_id,
> +                            uint32_t (**dump)[2], uint32_t *n_regs);
>  static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
>                 uint32_t pipe_id, uint32_t queue_id);
>  static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
> @@ -137,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>         .init_interrupts = kgd_init_interrupts,
>         .hqd_load = kgd_hqd_load,
>         .hqd_sdma_load = kgd_hqd_sdma_load,
> +       .hqd_dump = kgd_hqd_dump,
> +       .hqd_sdma_dump = kgd_hqd_sdma_dump,
>         .hqd_is_occupied = kgd_hqd_is_occupied,
>         .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
>         .hqd_destroy = kgd_hqd_destroy,
> @@ -365,6 +373,42 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>         return 0;
>  }
>
> +static int kgd_hqd_dump(struct kgd_dev *kgd,
> +                       uint32_t pipe_id, uint32_t queue_id,
> +                       uint32_t (**dump)[2], uint32_t *n_regs)
> +{
> +       struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +       uint32_t i = 0, reg;
> +#define HQD_N_REGS (54+4)
> +#define DUMP_REG(addr) do {                            \
> +               if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
> +                       break;                          \
> +               (*dump)[i][0] = (addr) << 2;            \
> +               (*dump)[i++][1] = RREG32(addr);         \
> +       } while (0)
> +
> +       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> +       if (*dump == NULL)
> +               return -ENOMEM;
> +
> +       acquire_queue(kgd, pipe_id, queue_id);
> +
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
> +       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
> +
> +       for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
> +               DUMP_REG(reg);
> +
> +       release_queue(kgd);
> +
> +       WARN_ON_ONCE(i != HQD_N_REGS);
> +       *n_regs = i;
> +
> +       return 0;
> +}
> +
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>                              uint32_t __user *wptr, struct mm_struct *mm)
>  {
> @@ -428,6 +472,42 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
>         return 0;
>  }
>
> +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
> +                            uint32_t engine_id, uint32_t queue_id,
> +                            uint32_t (**dump)[2], uint32_t *n_regs)
> +{
> +       struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +       uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
> +               queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
> +       uint32_t i = 0, reg;
> +#undef HQD_N_REGS
> +#define HQD_N_REGS (19+4+2+3+7)
> +
> +       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
> +       if (*dump == NULL)
> +               return -ENOMEM;
> +
> +       for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
> +               DUMP_REG(sdma_offset + reg);
> +       for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
> +            reg++)
> +               DUMP_REG(sdma_offset + reg);
> +       for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
> +            reg++)
> +               DUMP_REG(sdma_offset + reg);
> +       for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
> +            reg++)
> +               DUMP_REG(sdma_offset + reg);
> +       for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
> +            reg++)
> +               DUMP_REG(sdma_offset + reg);
> +
> +       WARN_ON_ONCE(i != HQD_N_REGS);
> +       *n_regs = i;
> +
> +       return 0;
> +}
> +
>  static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
>                                 uint32_t pipe_id, uint32_t queue_id)
>  {
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index c6d4e64..fe3079a 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -131,6 +131,12 @@ struct tile_config {
>   * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
>   * used only for no HWS mode.
>   *
> + * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
> + * Array is allocated with kmalloc, needs to be freed with kfree by caller.
> + *
> + * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
> + * Array is allocated with kmalloc, needs to be freed with kfree by caller.
> + *
>   * @hqd_is_occupies: Checks if a hqd slot is occupied.
>   *
>   * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
> @@ -187,6 +193,14 @@ struct kfd2kgd_calls {
>         int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
>                              uint32_t __user *wptr, struct mm_struct *mm);
>
> +       int (*hqd_dump)(struct kgd_dev *kgd,
> +                       uint32_t pipe_id, uint32_t queue_id,
> +                       uint32_t (**dump)[2], uint32_t *n_regs);
> +
> +       int (*hqd_sdma_dump)(struct kgd_dev *kgd,
> +                            uint32_t engine_id, uint32_t queue_id,
> +                            uint32_t (**dump)[2], uint32_t *n_regs);
> +
>         bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
>                                 uint32_t pipe_id, uint32_t queue_id);
>
> --
> 2.7.4
>

This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>


More information about the amd-gfx mailing list