Mesa (master): radv: improve reporting faulty pipelines when a GPU hang is detected

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Sep 1 06:49:15 UTC 2020


Module: Mesa
Branch: master
Commit: aa675cdc91fe1d317650c279b3470c0081e85527
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa675cdc91fe1d317650c279b3470c0081e85527

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Wed Aug 26 17:34:28 2020 +0200

radv: improve reporting faulty pipelines when a GPU hang is detected

Because the driver now waits for idle after every draw/dispatch
calls, we shouldn't report gfx pipelines when the GPU hang happens
after a dispatch (or the opposite).

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by:  Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6471>

---

 src/amd/vulkan/radv_cmd_buffer.c |  9 ++--
 src/amd/vulkan/radv_debug.c      | 88 ++++++++--------------------------------
 2 files changed, 24 insertions(+), 73 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 873549677c5..792462ed9e2 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -626,14 +626,17 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
 
 static void
 radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
-		   struct radv_pipeline *pipeline, enum ring_type ring)
+		   struct radv_pipeline *pipeline)
 {
 	struct radv_device *device = cmd_buffer->device;
+	enum ring_type ring;
 	uint32_t data[2];
 	uint64_t va;
 
 	va = radv_buffer_get_va(device->trace_bo);
 
+	ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+
 	switch (ring) {
 	case RING_GFX:
 		va += 8;
@@ -1313,7 +1316,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
 				   pipeline->gs_copy_shader->bo);
 
 	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
+		radv_save_pipeline(cmd_buffer, pipeline);
 
 	cmd_buffer->state.emitted_pipeline = pipeline;
 
@@ -4174,7 +4177,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
 			   pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
 
 	if (unlikely(cmd_buffer->device->trace_bo))
-		radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+		radv_save_pipeline(cmd_buffer, pipeline);
 }
 
 static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index d50480aa47f..5bee83021f6 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -48,8 +48,8 @@
  *
  * [0]: primary trace ID
  * [1]: secondary trace ID
- * [2-3]: 64-bit GFX pipeline pointer
- * [4-5]: 64-bit COMPUTE pipeline pointer
+ * [2-3]: 64-bit GFX ring pipeline pointer
+ * [4-5]: 64-bit COMPUTE ring pipeline pointer
  * [6-7]: 64-bit descriptor set #0 pointer
  * ...
  * [68-69]: 64-bit descriptor set #31 pointer
@@ -459,61 +459,29 @@ radv_dump_shaders(struct radv_pipeline *pipeline,
 	}
 }
 
-static void
-radv_dump_pipeline_state(struct radv_pipeline *pipeline,
-			 VkShaderStageFlagBits active_stages, FILE *f)
-{
-	radv_dump_shaders(pipeline, active_stages, f);
-	radv_dump_annotated_shaders(pipeline, active_stages, f);
-}
-
-static void
-radv_dump_graphics_state(struct radv_device *device,
-			 struct radv_pipeline *graphics_pipeline,
-			 struct radv_pipeline *compute_pipeline, FILE *f)
-{
-	VkShaderStageFlagBits active_stages;
-
-	if (graphics_pipeline) {
-		active_stages = graphics_pipeline->active_stages;
-		radv_dump_pipeline_state(graphics_pipeline, active_stages, f);
-	}
-
-	if (compute_pipeline) {
-		active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
-		radv_dump_pipeline_state(compute_pipeline, active_stages, f);
-	}
-
-	radv_dump_descriptors(device, f);
-}
-
-static void
-radv_dump_compute_state(struct radv_device *device,
-			struct radv_pipeline *compute_pipeline, FILE *f)
-{
-	VkShaderStageFlagBits active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
-
-	if (!compute_pipeline)
-		return;
-
-	radv_dump_pipeline_state(compute_pipeline, active_stages, f);
-	radv_dump_descriptors(device, f);
-}
-
 static struct radv_pipeline *
-radv_get_saved_graphics_pipeline(struct radv_device *device)
+radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
 {
 	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+	int offset = ring == RING_GFX ? 1 : 2;
 
-	return *(struct radv_pipeline **)(ptr + 1);
+	return *(struct radv_pipeline **)(ptr + offset);
 }
 
-static struct radv_pipeline *
-radv_get_saved_compute_pipeline(struct radv_device *device)
+static void
+radv_dump_queue_state(struct radv_queue *queue, FILE *f)
 {
-	uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+	enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+	struct radv_pipeline *pipeline;
 
-	return *(struct radv_pipeline **)(ptr + 2);
+	fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
+
+	pipeline = radv_get_saved_pipeline(queue->device, ring);
+	if (pipeline) {
+		radv_dump_shaders(pipeline, pipeline->active_stages, f);
+		radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
+		radv_dump_descriptors(queue->device, f);
+	}
 }
 
 static void
@@ -596,7 +564,6 @@ radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
 void
 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
-	struct radv_pipeline *graphics_pipeline, *compute_pipeline;
 	struct radv_device *device = queue->device;
 	enum ring_type ring;
 	uint64_t addr;
@@ -611,9 +578,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 	if (!hang_occurred && !vm_fault_occurred)
 		return;
 
-	graphics_pipeline = radv_get_saved_graphics_pipeline(device);
-	compute_pipeline = radv_get_saved_compute_pipeline(device);
-
 	radv_dump_trace(queue->device, cs);
 
 	fprintf(stderr, "GPU hang report:\n\n");
@@ -628,23 +592,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 	}
 
 	radv_dump_debug_registers(device, stderr);
-
-	switch (ring) {
-	case RING_GFX:
-		fprintf(stderr, "RING_GFX:\n");
-		radv_dump_graphics_state(queue->device,
-					 graphics_pipeline, compute_pipeline,
-					 stderr);
-		break;
-	case RING_COMPUTE:
-		fprintf(stderr, "RING_COMPUTE:\n");
-		radv_dump_compute_state(queue->device,
-					compute_pipeline, stderr);
-		break;
-	default:
-		assert(0);
-		break;
-	}
+	radv_dump_queue_state(queue, stderr);
 
 	abort();
 }



More information about the mesa-commit mailing list