Mesa (master): radv/gfx10: allocate GDS/OA buffer objects for NGG streamout

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Sep 16 10:12:47 UTC 2019


Module: Mesa
Branch: master
Commit: e1dc3ab753480db414a68ef7944f00cfc75d5882
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1dc3ab753480db414a68ef7944f00cfc75d5882

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Mon Sep  9 10:26:54 2019 +0200

radv/gfx10: allocate GDS/OA buffer objects for NGG streamout

This allocates two BOs for GFX10 NGG streamout.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_cmd_buffer.c |  4 +++
 src/amd/vulkan/radv_device.c     | 66 +++++++++++++++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h    |  4 +++
 3 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f55d78b9250..22b89760bbb 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -337,6 +337,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 	cmd_buffer->esgs_ring_size_needed = 0;
 	cmd_buffer->gsvs_ring_size_needed = 0;
 	cmd_buffer->tess_rings_needed = false;
+	cmd_buffer->gds_needed = false;
 	cmd_buffer->sample_positions_needed = false;
 
 	if (cmd_buffer->upload.upload_bo)
@@ -5815,6 +5816,9 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
 	    ((old_streamout_enabled != so->streamout_enabled) ||
 	     (old_hw_enabled_mask != so->hw_enabled_mask)))
 		radv_emit_streamout_enable(cmd_buffer);
+
+	if (cmd_buffer->device->physical_device->use_ngg_streamout)
+		cmd_buffer->gds_needed = true;
 }
 
 static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 53a08bcdc5a..7786a71afc8 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1746,6 +1746,10 @@ radv_queue_finish(struct radv_queue *queue)
 		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
 	if (queue->tess_rings_bo)
 		queue->device->ws->buffer_destroy(queue->tess_rings_bo);
+	if (queue->gds_bo)
+		queue->device->ws->buffer_destroy(queue->gds_bo);
+	if (queue->gds_oa_bo)
+		queue->device->ws->buffer_destroy(queue->gds_oa_bo);
 	if (queue->compute_scratch_bo)
 		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
 }
@@ -2598,6 +2602,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		     uint32_t esgs_ring_size,
 		     uint32_t gsvs_ring_size,
 		     bool needs_tess_rings,
+		     bool needs_gds,
 		     bool needs_sample_positions,
 		     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
                      struct radeon_cmdbuf **initial_preamble_cs,
@@ -2609,8 +2614,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
 	struct radeon_winsys_bo *esgs_ring_bo = NULL;
 	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
 	struct radeon_winsys_bo *tess_rings_bo = NULL;
+	struct radeon_winsys_bo *gds_bo = NULL;
+	struct radeon_winsys_bo *gds_oa_bo = NULL;
 	struct radeon_cmdbuf *dest_cs[3] = {0};
-	bool add_tess_rings = false, add_sample_positions = false;
+	bool add_tess_rings = false, add_gds = false, add_sample_positions = false;
 	unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
 	unsigned max_offchip_buffers;
 	unsigned hs_offchip_param = 0;
@@ -2620,6 +2627,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		if (needs_tess_rings)
 			add_tess_rings = true;
 	}
+	if (!queue->has_gds) {
+		if (needs_gds)
+			add_gds = true;
+	}
 	if (!queue->has_sample_positions) {
 		if (needs_sample_positions)
 			add_sample_positions = true;
@@ -2635,7 +2646,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 	    compute_scratch_size <= queue->compute_scratch_size &&
 	    esgs_ring_size <= queue->esgs_ring_size &&
 	    gsvs_ring_size <= queue->gsvs_ring_size &&
-	    !add_tess_rings && !add_sample_positions &&
+	    !add_tess_rings && !add_gds && !add_sample_positions &&
 	    queue->initial_preamble_cs) {
 		*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
 		*initial_preamble_cs = queue->initial_preamble_cs;
@@ -2711,6 +2722,32 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		tess_rings_bo = queue->tess_rings_bo;
 	}
 
+	if (add_gds) {
+		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
+
+		/* 4 streamout GDS counters.
+		 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
+		 */
+		gds_bo = queue->device->ws->buffer_create(queue->device->ws,
+							  256, 4,
+							  RADEON_DOMAIN_GDS,
+							  ring_bo_flags,
+							  RADV_BO_PRIORITY_SCRATCH);
+		if (!gds_bo)
+			goto fail;
+
+		gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
+							     4, 1,
+							     RADEON_DOMAIN_OA,
+							     ring_bo_flags,
+							     RADV_BO_PRIORITY_SCRATCH);
+		if (!gds_oa_bo)
+			goto fail;
+	} else {
+		gds_bo = queue->gds_bo;
+		gds_oa_bo = queue->gds_oa_bo;
+	}
+
 	if (scratch_bo != queue->scratch_bo ||
 	    esgs_ring_bo != queue->esgs_ring_bo ||
 	    gsvs_ring_bo != queue->gsvs_ring_bo ||
@@ -2801,6 +2838,11 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
 		radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
 
+		if (gds_bo)
+			radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
+		if (gds_oa_bo)
+			radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
+
 		if (i == 0) {
 			si_cs_emit_cache_flush(cs,
 			                       queue->device->physical_device->rad_info.chip_class,
@@ -2876,6 +2918,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		queue->has_tess_rings = true;
 	}
 
+	if (gds_bo != queue->gds_bo) {
+		queue->gds_bo = gds_bo;
+		queue->has_gds = true;
+	}
+
+	if (gds_oa_bo != queue->gds_oa_bo)
+		queue->gds_oa_bo = gds_oa_bo;
+
 	if (descriptor_bo != queue->descriptor_bo) {
 		if (queue->descriptor_bo)
 			queue->device->ws->buffer_destroy(queue->descriptor_bo);
@@ -2908,6 +2958,11 @@ fail:
 		queue->device->ws->buffer_destroy(gsvs_ring_bo);
 	if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
 		queue->device->ws->buffer_destroy(tess_rings_bo);
+	if (gds_bo && gds_bo != queue->gds_bo)
+		queue->device->ws->buffer_destroy(gds_bo);
+	if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
+		queue->device->ws->buffer_destroy(gds_oa_bo);
+
 	return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
 }
 
@@ -3070,6 +3125,7 @@ VkResult radv_QueueSubmit(
 	VkResult result;
 	bool fence_emitted = false;
 	bool tess_rings_needed = false;
+	bool gds_needed = false;
 	bool sample_positions_needed = false;
 
 	/* Do this first so failing to allocate scratch buffers can't result in
@@ -3085,14 +3141,16 @@ VkResult radv_QueueSubmit(
 			esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
 			gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
 			tess_rings_needed |= cmd_buffer->tess_rings_needed;
+			gds_needed |= cmd_buffer->gds_needed;
 			sample_positions_needed |= cmd_buffer->sample_positions_needed;
 		}
 	}
 
 	result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
 	                              esgs_ring_size, gsvs_ring_size, tess_rings_needed,
-				      sample_positions_needed, &initial_flush_preamble_cs,
-	                              &initial_preamble_cs, &continue_preamble_cs);
+				      gds_needed, sample_positions_needed,
+				      &initial_flush_preamble_cs,
+				      &initial_preamble_cs, &continue_preamble_cs);
 	if (result != VK_SUCCESS)
 		return result;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 8b612155621..03dc9e02145 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -663,6 +663,7 @@ struct radv_queue {
 	uint32_t esgs_ring_size;
 	uint32_t gsvs_ring_size;
 	bool has_tess_rings;
+	bool has_gds;
 	bool has_sample_positions;
 
 	struct radeon_winsys_bo *scratch_bo;
@@ -671,6 +672,8 @@ struct radv_queue {
 	struct radeon_winsys_bo *esgs_ring_bo;
 	struct radeon_winsys_bo *gsvs_ring_bo;
 	struct radeon_winsys_bo *tess_rings_bo;
+	struct radeon_winsys_bo *gds_bo;
+	struct radeon_winsys_bo *gds_oa_bo;
 	struct radeon_cmdbuf *initial_preamble_cs;
 	struct radeon_cmdbuf *initial_full_flush_preamble_cs;
 	struct radeon_cmdbuf *continue_preamble_cs;
@@ -1223,6 +1226,7 @@ struct radv_cmd_buffer {
 	uint32_t esgs_ring_size_needed;
 	uint32_t gsvs_ring_size_needed;
 	bool tess_rings_needed;
+	bool gds_needed; /* for GFX10 streamout */
 	bool sample_positions_needed;
 
 	VkResult record_result;




More information about the mesa-commit mailing list