[Mesa-dev] [PATCH 3/5] radv: Dump command buffer on hang.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Mon Jan 2 19:01:10 UTC 2017
On Mon, Jan 2, 2017 at 7:57 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
> ---
> src/amd/vulkan/radv_cmd_buffer.c | 35 +++++++++++++
> src/amd/vulkan/radv_device.c | 71 +++++++++++++++++++++++----
> src/amd/vulkan/radv_private.h | 5 ++
> src/amd/vulkan/radv_radeon_winsys.h | 2 +
> src/amd/vulkan/si_cmd_buffer.c | 5 ++
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 +++++++++++
> 6 files changed, 139 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index fdb35a0060..651b1dd452 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -32,6 +32,8 @@
> #include "vk_format.h"
> #include "radv_meta.h"
>
> +#include "ac_debug.h"
> +
> static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
> struct radv_image *image,
> VkImageLayout src_layout,
> @@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
> return true;
> }
>
> +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
> +{
> + struct radv_device *device = cmd_buffer->device;
> + struct radeon_winsys_cs *cs = cmd_buffer->cs;
> + uint64_t va;
> +
> + if (!device->trace_bo)
> + return;
> +
> + va = device->ws->buffer_get_va(device->trace_bo);
> +
> + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
> +
> + ++cmd_buffer->state.trace_id;
> + device->ws->cs_add_buffer(cs, device->trace_bo, 8);
> + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
> + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
> + S_370_WR_CONFIRM(1) |
> + S_370_ENGINE_SEL(V_370_ME));
> + radeon_emit(cs, va);
> + radeon_emit(cs, va >> 32);
> + radeon_emit(cs, cmd_buffer->state.trace_id);
> + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
> + radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
> +}
> +
> static void
> radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
> struct radv_pipeline *pipeline)
> @@ -1929,6 +1957,8 @@ void radv_CmdDraw(
> S_0287F0_USE_OPAQUE(0));
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> +
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
> @@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
> radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> static void
> @@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
> radeon_emit(cs, count_va >> 32);
> radeon_emit(cs, stride); /* stride */
> radeon_emit(cs, di_src_sel);
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> static void
> @@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
> radeon_emit(cmd_buffer->cs, 1);
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> void radv_CmdDispatchIndirect(
> @@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
> }
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> void radv_unaligned_dispatch(
> @@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
> S_00B800_PARTIAL_TG_EN(1));
>
> assert(cmd_buffer->cs->cdw <= cdw_max);
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> void radv_CmdEndRenderPass(
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index e57a419cfa..54cedc2943 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
> device->ws->cs_finalize(device->empty_cs[family]);
> }
>
> + if (false) {
> + device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
> + RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
> + if (!device->trace_bo)
> + goto fail;
> +
> + device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
> + if (!device->trace_id_ptr)
> + goto fail;
> + }
> +
> *pDevice = radv_device_to_handle(device);
> return VK_SUCCESS;
>
> fail:
> + if (device->trace_bo)
> + device->ws->buffer_destroy(device->trace_bo);
> +
> for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
> for (unsigned q = 0; q < device->queue_count[i]; q++)
> radv_queue_finish(&device->queues[i][q]);
> if (device->queue_count[i])
> vk_free(&device->alloc, device->queues[i]);
> }
> +
> + if (device->hw_ctx)
> + device->ws->ctx_destroy(device->hw_ctx);
> +
> vk_free(&device->alloc, device);
> return result;
> }
> @@ -780,6 +798,9 @@ void radv_DestroyDevice(
> {
> RADV_FROM_HANDLE(radv_device, device, _device);
>
> + if (device->trace_bo)
> + device->ws->buffer_destroy(device->trace_bo);
> +
> device->ws->ctx_destroy(device->hw_ctx);
> for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
> for (unsigned q = 0; q < device->queue_count[i]; q++)
> @@ -880,10 +901,12 @@ VkResult radv_QueueSubmit(
> struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
> struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
> int ret;
> + uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
>
> for (uint32_t i = 0; i < submitCount; i++) {
> struct radeon_winsys_cs **cs_array;
> bool can_patch = true;
> + uint32_t advance;
>
> if (!pSubmits[i].commandBufferCount)
> continue;
> @@ -900,15 +923,45 @@ VkResult radv_QueueSubmit(
> if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
> can_patch = false;
> }
> - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
> - pSubmits[i].commandBufferCount,
> - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> - pSubmits[i].waitSemaphoreCount,
> - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> - pSubmits[i].signalSemaphoreCount,
> - can_patch, base_fence);
> - if (ret)
> - radv_loge("failed to submit CS %d\n", i);
> +
> + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
> + advance = MIN2(max_cs_submission,
> + pSubmits[i].commandBufferCount - j);
> + bool b = j == 0;
> + bool e = j + advance == pSubmits[i].commandBufferCount;
> +
> + if (queue->device->trace_bo)
> + *queue->device->trace_id_ptr = 0;
> +
> + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
> + pSubmits[i].commandBufferCount,
> + (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> + b ? pSubmits[i].waitSemaphoreCount : 0,
> + (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> + e ? pSubmits[i].signalSemaphoreCount : 0,
> + can_patch, base_fence);
> +
> + if (ret) {
> + radv_loge("failed to submit CS %d\n", i);
> + abort();
> + }
> + if (queue->device->trace_bo) {
> + bool success = queue->device->ws->ctx_wait_idle(
> + queue->device->hw_ctx,
> + radv_queue_family_to_ring(
> + queue->queue_family_index),
> + queue->queue_idx);
> +
> + if (!success) { /* Hang */
> + FILE *f = fopen("/home/bas/hang.log", "w");
Just noticed that I forgot to non-hardcode this. Will send a v2 soon.
> + fprintf(f, "Trace ID: %x\n", *queue->device->trace_id_ptr);
> + queue->device->ws->cs_dump(cs_array[j], f,
> + *queue->device->trace_id_ptr);
> + fclose(f);
> + abort();
> + }
> + }
> + }
> free(cs_array);
> }
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index f76d38dba6..9bae7494a9 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -487,6 +487,9 @@ struct radv_device {
> float sample_locations_4x[4][2];
> float sample_locations_8x[8][2];
> float sample_locations_16x[16][2];
> +
> + struct radeon_winsys_bo *trace_bo;
> + uint32_t *trace_id_ptr;
> };
>
> struct radv_device_memory {
> @@ -677,6 +680,7 @@ struct radv_cmd_state {
> unsigned active_occlusion_queries;
> float offset_scale;
> uint32_t descriptors_dirty;
> + uint32_t trace_id;
> };
>
> struct radv_cmd_pool {
> @@ -771,6 +775,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
> void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
> struct radeon_winsys_bo *bo,
> uint64_t offset, uint64_t size, uint32_t value);
> +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
>
> /*
> * Takes x,y,z as exact numbers of invocations, instead of blocks.
> diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
> index 4b738b8cf4..a0b5092e30 100644
> --- a/src/amd/vulkan/radv_radeon_winsys.h
> +++ b/src/amd/vulkan/radv_radeon_winsys.h
> @@ -319,6 +319,8 @@ struct radeon_winsys {
> void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
> struct radeon_winsys_cs *child);
>
> + void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
> +
> int (*surface_init)(struct radeon_winsys *ws,
> struct radeon_surf *surf);
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index e3f883f50b..a483ad9fd3 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
> }
> }
>
> + if (cmd_buffer->state.flush_bits)
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> cmd_buffer->state.flush_bits = 0;
> }
>
> @@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
> radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> radeon_emit(cs, 0);
> }
> +
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
> @@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
> radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> radeon_emit(cs, 0);
> }
> + radv_cmd_buffer_trace_emit(cmd_buffer);
> }
>
> static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index b24aa99749..99b16192bc 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -27,6 +27,7 @@
> #include <amdgpu_drm.h>
> #include <assert.h>
>
> +#include "ac_debug.h"
> #include "amdgpu_id.h"
> #include "radv_radeon_winsys.h"
> #include "radv_amdgpu_cs.h"
> @@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
> return ret;
> }
>
> +
> +static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
> +{
> + void *ret = NULL;
> + for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
> + struct radv_amdgpu_winsys_bo *bo;
> +
> + bo = (struct radv_amdgpu_winsys_bo*)
> + (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
> + if (addr >= bo->va && addr - bo->va < bo->size) {
> + if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
> + return (char *)ret + (addr - bo->va);
> + }
> + }
> + return ret;
> +}
> +
> +static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
> + FILE* file,
> + uint32_t trace_id)
> +{
> + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
> +
> + ac_parse_ib(file,
> + radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
> + cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class);
> +}
> +
> static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
> {
> struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
> @@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
> ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
> ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
> ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
> + ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
> ws->base.create_fence = radv_amdgpu_create_fence;
> ws->base.destroy_fence = radv_amdgpu_destroy_fence;
> ws->base.create_sem = radv_amdgpu_create_sem;
> --
> 2.11.0
>
More information about the mesa-dev
mailing list