[Mesa-dev] [PATCH v2 3/5] radv: Dump command buffer on hang.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Jan 3 22:05:50 UTC 2017


v2:
  - Now use the filename specified by RADV_TRACE_FILE env var.
  - Use the same var to enable tracing.

I thought we could as well always set the filename explicitly
instead of having some arbitrary defaults, and at that point
we don't need a separate feature enable.

Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
---
 src/amd/vulkan/radv_cmd_buffer.c              | 35 ++++++++++++
 src/amd/vulkan/radv_device.c                  | 82 ++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h                 |  5 ++
 src/amd/vulkan/radv_radeon_winsys.h           |  2 +
 src/amd/vulkan/si_cmd_buffer.c                |  5 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 ++++++++++
 6 files changed, 150 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fdb35a0060..651b1dd452 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -32,6 +32,8 @@
 #include "vk_format.h"
 #include "radv_meta.h"
 
+#include "ac_debug.h"
+
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageLayout src_layout,
@@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 	return true;
 }
 
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_device *device = cmd_buffer->device;
+	struct radeon_winsys_cs *cs = cmd_buffer->cs;
+	uint64_t va;
+
+	if (!device->trace_bo)
+		return;
+
+	va = device->ws->buffer_get_va(device->trace_bo);
+
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
+
+	++cmd_buffer->state.trace_id;
+	device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		    S_370_WR_CONFIRM(1) |
+		    S_370_ENGINE_SEL(V_370_ME));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit(cs, cmd_buffer->state.trace_id);
+	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+	radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
+}
+
 static void
 radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
 			       struct radv_pipeline *pipeline)
@@ -1929,6 +1957,8 @@ void radv_CmdDraw(
 		    S_0287F0_USE_OPAQUE(0));
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
+
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
@@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
 	radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void
@@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
 	radeon_emit(cs, count_va >> 32);
 	radeon_emit(cs, stride); /* stride */
 	radeon_emit(cs, di_src_sel);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void
@@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
 	radeon_emit(cmd_buffer->cs, 1);
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_CmdDispatchIndirect(
@@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
 	}
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_unaligned_dispatch(
@@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
 	                            S_00B800_PARTIAL_TG_EN(1));
 
 	assert(cmd_buffer->cs->cdw <= cdw_max);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_CmdEndRenderPass(
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e57a419cfa..ef8ca1a375 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
 		device->ws->cs_finalize(device->empty_cs[family]);
 	}
 
+	if (getenv("RADV_TRACE_FILE")) {
+		device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
+							     RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
+		if (!device->trace_bo)
+			goto fail;
+
+		device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
+		if (!device->trace_id_ptr)
+			goto fail;
+	}
+
 	*pDevice = radv_device_to_handle(device);
 	return VK_SUCCESS;
 
 fail:
+	if (device->trace_bo)
+		device->ws->buffer_destroy(device->trace_bo);
+
 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
 		for (unsigned q = 0; q < device->queue_count[i]; q++)
 			radv_queue_finish(&device->queues[i][q]);
 		if (device->queue_count[i])
 			vk_free(&device->alloc, device->queues[i]);
 	}
+
+	if (device->hw_ctx)
+		device->ws->ctx_destroy(device->hw_ctx);
+
 	vk_free(&device->alloc, device);
 	return result;
 }
@@ -780,6 +798,9 @@ void radv_DestroyDevice(
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
 
+	if (device->trace_bo)
+		device->ws->buffer_destroy(device->trace_bo);
+
 	device->ws->ctx_destroy(device->hw_ctx);
 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
 		for (unsigned q = 0; q < device->queue_count[i]; q++)
@@ -869,6 +890,21 @@ void radv_GetDeviceQueue(
 	*pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
 }
 
+static void radv_dump_trace(struct radv_device *device,
+			    struct radeon_winsys_cs *cs)
+{
+	const char *filename = getenv("RADV_TRACE_FILE");
+	FILE *f = fopen(filename, "w");
+	if (!f) {
+		fprintf(stderr, "Failed to write trace dump to %s\n", filename);
+		return;
+	}
+
+	fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+	device->ws->cs_dump(cs, f, *device->trace_id_ptr);
+	fclose(f);
+}
+
 VkResult radv_QueueSubmit(
 	VkQueue                                     _queue,
 	uint32_t                                    submitCount,
@@ -880,10 +916,12 @@ VkResult radv_QueueSubmit(
 	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
 	struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
 	int ret;
+	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
 
 	for (uint32_t i = 0; i < submitCount; i++) {
 		struct radeon_winsys_cs **cs_array;
 		bool can_patch = true;
+		uint32_t advance;
 
 		if (!pSubmits[i].commandBufferCount)
 			continue;
@@ -900,15 +938,41 @@ VkResult radv_QueueSubmit(
 			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
 				can_patch = false;
 		}
-		ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
-						   pSubmits[i].commandBufferCount,
-						   (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
-						   pSubmits[i].waitSemaphoreCount,
-						   (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
-						   pSubmits[i].signalSemaphoreCount,
-						   can_patch, base_fence);
-		if (ret)
-			radv_loge("failed to submit CS %d\n", i);
+
+		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
+			advance = MIN2(max_cs_submission,
+				       pSubmits[i].commandBufferCount - j);
+			bool b = j == 0;
+			bool e = j + advance == pSubmits[i].commandBufferCount;
+
+			if (queue->device->trace_bo)
+				*queue->device->trace_id_ptr = 0;
+
+			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
+							pSubmits[i].commandBufferCount,
+							(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
+							b ? pSubmits[i].waitSemaphoreCount : 0,
+							(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
+							e ? pSubmits[i].signalSemaphoreCount : 0,
+							can_patch, base_fence);
+
+			if (ret) {
+				radv_loge("failed to submit CS %d\n", i);
+				abort();
+			}
+			if (queue->device->trace_bo) {
+				bool success = queue->device->ws->ctx_wait_idle(
+							queue->device->hw_ctx,
+							radv_queue_family_to_ring(
+								queue->queue_family_index),
+							queue->queue_idx);
+
+				if (!success) { /* Hang */
+					radv_dump_trace(queue->device, cs_array[j]);
+					abort();
+				}
+			}
+		}
 		free(cs_array);
 	}
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index f76d38dba6..9bae7494a9 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -487,6 +487,9 @@ struct radv_device {
 	float sample_locations_4x[4][2];
 	float sample_locations_8x[8][2];
 	float sample_locations_16x[16][2];
+
+	struct radeon_winsys_bo                      *trace_bo;
+	uint32_t                                     *trace_id_ptr;
 };
 
 struct radv_device_memory {
@@ -677,6 +680,7 @@ struct radv_cmd_state {
 	unsigned                                     active_occlusion_queries;
 	float					     offset_scale;
 	uint32_t                                      descriptors_dirty;
+	uint32_t                                      trace_id;
 };
 
 struct radv_cmd_pool {
@@ -771,6 +775,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
 		      struct radeon_winsys_bo *bo,
 		      uint64_t offset, uint64_t size, uint32_t value);
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
 
 /*
  * Takes x,y,z as exact numbers of invocations, instead of blocks.
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4b738b8cf4..a0b5092e30 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -319,6 +319,8 @@ struct radeon_winsys {
 	void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
 				    struct radeon_winsys_cs *child);
 
+	void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+
 	int (*surface_init)(struct radeon_winsys *ws,
 			    struct radeon_surf *surf);
 
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index e3f883f50b..a483ad9fd3 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 		}
 	}
 
+	if (cmd_buffer->state.flush_bits)
+		radv_cmd_buffer_trace_emit(cmd_buffer);
 	cmd_buffer->state.flush_bits = 0;
 }
 
@@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index b24aa99749..99b16192bc 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include <amdgpu_drm.h>
 #include <assert.h>
 
+#include "ac_debug.h"
 #include "amdgpu_id.h"
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_cs.h"
@@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
 	return ret;
 }
 
+
+static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, uint64_t addr)
+{
+	void *ret = NULL;
+	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+		struct radv_amdgpu_winsys_bo *bo;
+
+		bo = (struct radv_amdgpu_winsys_bo*)
+		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
+		if (addr >= bo->va && addr - bo->va < bo->size) {
+			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+				return (char *)ret + (addr - bo->va);
+		}
+	}
+	return ret;
+}
+
+static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
+                                       FILE* file,
+                                       uint32_t trace_id)
+{
+	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+
+	ac_parse_ib(file,
+		    radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
+		    cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class);
+}
+
 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
 {
 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
 	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
 	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
 	ws->base.create_fence = radv_amdgpu_create_fence;
 	ws->base.destroy_fence = radv_amdgpu_destroy_fence;
 	ws->base.create_sem = radv_amdgpu_create_sem;
-- 
2.11.0



More information about the mesa-dev mailing list