[Mesa-dev] [PATCH 3/5] radv: Dump command buffer on hang.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Mon Jan 2 19:01:10 UTC 2017


On Mon, Jan 2, 2017 at 7:57 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c              | 35 +++++++++++++
>  src/amd/vulkan/radv_device.c                  | 71 +++++++++++++++++++++++----
>  src/amd/vulkan/radv_private.h                 |  5 ++
>  src/amd/vulkan/radv_radeon_winsys.h           |  2 +
>  src/amd/vulkan/si_cmd_buffer.c                |  5 ++
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 +++++++++++
>  6 files changed, 139 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index fdb35a0060..651b1dd452 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -32,6 +32,8 @@
>  #include "vk_format.h"
>  #include "radv_meta.h"
>
> +#include "ac_debug.h"
> +
>  static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
>                                          struct radv_image *image,
>                                          VkImageLayout src_layout,
> @@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
>         return true;
>  }
>
> +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
> +{
> +       struct radv_device *device = cmd_buffer->device;
> +       struct radeon_winsys_cs *cs = cmd_buffer->cs;
> +       uint64_t va;
> +
> +       if (!device->trace_bo)
> +               return;
> +
> +       va = device->ws->buffer_get_va(device->trace_bo);
> +
> +       MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
> +
> +       ++cmd_buffer->state.trace_id;
> +       device->ws->cs_add_buffer(cs, device->trace_bo, 8);
> +       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
> +       radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
> +                   S_370_WR_CONFIRM(1) |
> +                   S_370_ENGINE_SEL(V_370_ME));
> +       radeon_emit(cs, va);
> +       radeon_emit(cs, va >> 32);
> +       radeon_emit(cs, cmd_buffer->state.trace_id);
> +       radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
> +       radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
> +}
> +
>  static void
>  radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
>                                struct radv_pipeline *pipeline)
> @@ -1929,6 +1957,8 @@ void radv_CmdDraw(
>                     S_0287F0_USE_OPAQUE(0));
>
>         assert(cmd_buffer->cs->cdw <= cdw_max);
> +
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
> @@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
>         radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
>
>         assert(cmd_buffer->cs->cdw <= cdw_max);
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  static void
> @@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
>         radeon_emit(cs, count_va >> 32);
>         radeon_emit(cs, stride); /* stride */
>         radeon_emit(cs, di_src_sel);
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  static void
> @@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
>         radeon_emit(cmd_buffer->cs, 1);
>
>         assert(cmd_buffer->cs->cdw <= cdw_max);
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  void radv_CmdDispatchIndirect(
> @@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
>         }
>
>         assert(cmd_buffer->cs->cdw <= cdw_max);
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  void radv_unaligned_dispatch(
> @@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
>                                     S_00B800_PARTIAL_TG_EN(1));
>
>         assert(cmd_buffer->cs->cdw <= cdw_max);
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  void radv_CmdEndRenderPass(
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index e57a419cfa..54cedc2943 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
>                 device->ws->cs_finalize(device->empty_cs[family]);
>         }
>
> +       if (false) {
> +               device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
> +                                                            RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
> +               if (!device->trace_bo)
> +                       goto fail;
> +
> +               device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
> +               if (!device->trace_id_ptr)
> +                       goto fail;
> +       }
> +
>         *pDevice = radv_device_to_handle(device);
>         return VK_SUCCESS;
>
>  fail:
> +       if (device->trace_bo)
> +               device->ws->buffer_destroy(device->trace_bo);
> +
>         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
>                 for (unsigned q = 0; q < device->queue_count[i]; q++)
>                         radv_queue_finish(&device->queues[i][q]);
>                 if (device->queue_count[i])
>                         vk_free(&device->alloc, device->queues[i]);
>         }
> +
> +       if (device->hw_ctx)
> +               device->ws->ctx_destroy(device->hw_ctx);
> +
>         vk_free(&device->alloc, device);
>         return result;
>  }
> @@ -780,6 +798,9 @@ void radv_DestroyDevice(
>  {
>         RADV_FROM_HANDLE(radv_device, device, _device);
>
> +       if (device->trace_bo)
> +               device->ws->buffer_destroy(device->trace_bo);
> +
>         device->ws->ctx_destroy(device->hw_ctx);
>         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
>                 for (unsigned q = 0; q < device->queue_count[i]; q++)
> @@ -880,10 +901,12 @@ VkResult radv_QueueSubmit(
>         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
>         struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
>         int ret;
> +       uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
>
>         for (uint32_t i = 0; i < submitCount; i++) {
>                 struct radeon_winsys_cs **cs_array;
>                 bool can_patch = true;
> +               uint32_t advance;
>
>                 if (!pSubmits[i].commandBufferCount)
>                         continue;
> @@ -900,15 +923,45 @@ VkResult radv_QueueSubmit(
>                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
>                                 can_patch = false;
>                 }
> -               ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
> -                                                  pSubmits[i].commandBufferCount,
> -                                                  (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> -                                                  pSubmits[i].waitSemaphoreCount,
> -                                                  (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> -                                                  pSubmits[i].signalSemaphoreCount,
> -                                                  can_patch, base_fence);
> -               if (ret)
> -                       radv_loge("failed to submit CS %d\n", i);
> +
> +               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
> +                       advance = MIN2(max_cs_submission,
> +                                      pSubmits[i].commandBufferCount - j);
> +                       bool b = j == 0;
> +                       bool e = j + advance == pSubmits[i].commandBufferCount;
> +
> +                       if (queue->device->trace_bo)
> +                               *queue->device->trace_id_ptr = 0;
> +
> +                       ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
> +                                                       pSubmits[i].commandBufferCount,
> +                                                       (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> +                                                       b ? pSubmits[i].waitSemaphoreCount : 0,
> +                                                       (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> +                                                       e ? pSubmits[i].signalSemaphoreCount : 0,
> +                                                       can_patch, base_fence);
> +
> +                       if (ret) {
> +                               radv_loge("failed to submit CS %d\n", i);
> +                               abort();
> +                       }
> +                       if (queue->device->trace_bo) {
> +                               bool success = queue->device->ws->ctx_wait_idle(
> +                                                       queue->device->hw_ctx,
> +                                                       radv_queue_family_to_ring(
> +                                                               queue->queue_family_index),
> +                                                       queue->queue_idx);
> +
> +                               if (!success) { /* Hang */
> +                                       FILE *f = fopen("/home/bas/hang.log", "w");

Just noticed that I forgot to non-hardcode this. Will send a v2 soon.

> +                                       fprintf(f, "Trace ID: %x\n", *queue->device->trace_id_ptr);
> +                                       queue->device->ws->cs_dump(cs_array[j], f,
> +                                                                  *queue->device->trace_id_ptr);
> +                                       fclose(f);
> +                                       abort();
> +                               }
> +                       }
> +               }
>                 free(cs_array);
>         }
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index f76d38dba6..9bae7494a9 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -487,6 +487,9 @@ struct radv_device {
>         float sample_locations_4x[4][2];
>         float sample_locations_8x[8][2];
>         float sample_locations_16x[16][2];
> +
> +       struct radeon_winsys_bo                      *trace_bo;
> +       uint32_t                                     *trace_id_ptr;
>  };
>
>  struct radv_device_memory {
> @@ -677,6 +680,7 @@ struct radv_cmd_state {
>         unsigned                                     active_occlusion_queries;
>         float                                        offset_scale;
>         uint32_t                                      descriptors_dirty;
> +       uint32_t                                      trace_id;
>  };
>
>  struct radv_cmd_pool {
> @@ -771,6 +775,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
>  void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
>                       struct radeon_winsys_bo *bo,
>                       uint64_t offset, uint64_t size, uint32_t value);
> +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
>
>  /*
>   * Takes x,y,z as exact numbers of invocations, instead of blocks.
> diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
> index 4b738b8cf4..a0b5092e30 100644
> --- a/src/amd/vulkan/radv_radeon_winsys.h
> +++ b/src/amd/vulkan/radv_radeon_winsys.h
> @@ -319,6 +319,8 @@ struct radeon_winsys {
>         void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
>                                     struct radeon_winsys_cs *child);
>
> +       void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
> +
>         int (*surface_init)(struct radeon_winsys *ws,
>                             struct radeon_surf *surf);
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index e3f883f50b..a483ad9fd3 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
>                 }
>         }
>
> +       if (cmd_buffer->state.flush_bits)
> +               radv_cmd_buffer_trace_emit(cmd_buffer);
>         cmd_buffer->state.flush_bits = 0;
>  }
>
> @@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
>                 radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>                 radeon_emit(cs, 0);
>         }
> +
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
> @@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
>                 radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
>                 radeon_emit(cs, 0);
>         }
> +       radv_cmd_buffer_trace_emit(cmd_buffer);
>  }
>
>  static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index b24aa99749..99b16192bc 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -27,6 +27,7 @@
>  #include <amdgpu_drm.h>
>  #include <assert.h>
>
> +#include "ac_debug.h"
>  #include "amdgpu_id.h"
>  #include "radv_radeon_winsys.h"
>  #include "radv_amdgpu_cs.h"
> @@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
>         return ret;
>  }
>
> +
> +static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
> +{
> +       void *ret = NULL;
> +       for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
> +               struct radv_amdgpu_winsys_bo *bo;
> +
> +               bo = (struct radv_amdgpu_winsys_bo*)
> +                      (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
> +               if (addr >= bo->va && addr - bo->va < bo->size) {
> +                       if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
> +                               return (char *)ret + (addr - bo->va);
> +               }
> +       }
> +       return ret;
> +}
> +
> +static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
> +                                       FILE* file,
> +                                       uint32_t trace_id)
> +{
> +       struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
> +
> +       ac_parse_ib(file,
> +                   radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
> +                   cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class);
> +}
> +
>  static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
>  {
>         struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
> @@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
>         ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
>         ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
>         ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
> +       ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
>         ws->base.create_fence = radv_amdgpu_create_fence;
>         ws->base.destroy_fence = radv_amdgpu_destroy_fence;
>         ws->base.create_sem = radv_amdgpu_create_sem;
> --
> 2.11.0
>


More information about the mesa-dev mailing list