[Mesa-dev] [PATCH 25/29] radv: emit geometry ring size and pointers via preamble

Dave Airlie airlied at gmail.com
Mon Jan 30 06:02:59 UTC 2017


From: Dave Airlie <airlied at redhat.com>

This uses the scratch infrastructure to handle the esgs
and gsvs rings.

(this replaces the old code that did this with patching).

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c |  28 ++++++
 src/amd/vulkan/radv_device.c     | 199 ++++++++++++++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h    |   8 ++
 3 files changed, 224 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c351636..b1b125f 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1458,6 +1458,8 @@ static void  radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 	cmd_buffer->upload.offset = 0;
 
 	cmd_buffer->record_fail = false;
+
+	cmd_buffer->ring_offsets_idx = -1;
 }
 
 VkResult radv_ResetCommandBuffer(
@@ -1644,6 +1646,7 @@ VkResult radv_EndCommandBuffer(
 
 	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
 		si_emit_cache_flush(cmd_buffer);
+
 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
 	    cmd_buffer->record_fail)
 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -1730,6 +1733,20 @@ void radv_CmdBindPipeline(
 		radv_dynamic_state_copy(&cmd_buffer->state.dynamic,
 					&pipeline->dynamic_state,
 					pipeline->dynamic_state_mask);
+
+		if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+			cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
+		if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
+			cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+
+		if (radv_pipeline_has_gs(pipeline)) {
+			struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
+									     AC_UD_SCRATCH_RING_OFFSETS);
+			if (cmd_buffer->ring_offsets_idx == -1)
+				cmd_buffer->ring_offsets_idx = loc->sgpr_idx;
+			else if (loc->sgpr_idx != -1)
+				assert(loc->sgpr_idx != cmd_buffer->ring_offsets_idx);
+		}
 		break;
 	default:
 		assert(!"invalid bind point");
@@ -1882,6 +1899,17 @@ void radv_CmdExecuteCommands(
 		primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed,
 		                                            secondary->compute_scratch_size_needed);
 
+		if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
+			primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
+		if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
+			primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
+
+		if (secondary->ring_offsets_idx != -1) {
+			if (primary->ring_offsets_idx == -1)
+				primary->ring_offsets_idx = secondary->ring_offsets_idx;
+			else
+				assert(secondary->ring_offsets_idx == primary->ring_offsets_idx);
+		}
 		primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
 	}
 
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index af16c89..fd4e7f5 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -760,6 +760,10 @@ radv_queue_finish(struct radv_queue *queue)
 		queue->device->ws->buffer_destroy(queue->descriptor_bo);
 	if (queue->scratch_bo)
 		queue->device->ws->buffer_destroy(queue->scratch_bo);
+	if (queue->esgs_ring_bo)
+		queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
+	if (queue->gsvs_ring_bo)
+		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
 	if (queue->compute_scratch_bo)
 		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
 }
@@ -1042,24 +1046,118 @@ static void radv_dump_trace(struct radv_device *device,
 	fclose(f);
 }
 
+static void
+fill_geom_rings(struct radv_queue *queue,
+		uint32_t *map,
+		uint32_t esgs_ring_size,
+		struct radeon_winsys_bo *esgs_ring_bo,
+		uint32_t gsvs_ring_size,
+		struct radeon_winsys_bo *gsvs_ring_bo)
+{
+	uint64_t esgs_va, gsvs_va;
+	esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
+	gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
+	uint32_t *desc = &map[4];
+
+	/* stride 0, num records - size, add tid, swizzle, elsize4,
+	   index stride 64 */
+	desc[0] = esgs_va;
+	desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
+		S_008F04_STRIDE(0) |
+		S_008F04_SWIZZLE_ENABLE(true);
+	desc[2] = esgs_ring_size;
+	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+		S_008F0C_ELEMENT_SIZE(1) |
+		S_008F0C_INDEX_STRIDE(3) |
+		S_008F0C_ADD_TID_ENABLE(true);
+
+	desc += 4;
+	/* GS entry for ES->GS ring */
+	/* stride 0, num records - size, elsize0,
+	   index stride 0 */
+	desc[0] = esgs_va;
+	desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
+		S_008F04_STRIDE(0) |
+		S_008F04_SWIZZLE_ENABLE(false);
+	desc[2] = esgs_ring_size;
+	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+		S_008F0C_ELEMENT_SIZE(0) |
+		S_008F0C_INDEX_STRIDE(0) |
+		S_008F0C_ADD_TID_ENABLE(false);
+
+	desc += 4;
+	/* VS entry for GS->VS ring */
+	/* stride 0, num records - size, elsize0,
+	   index stride 0 */
+	desc[0] = gsvs_va;
+	desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+		S_008F04_STRIDE(0) |
+		S_008F04_SWIZZLE_ENABLE(false);
+	desc[2] = gsvs_ring_size;
+	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+		S_008F0C_ELEMENT_SIZE(0) |
+		S_008F0C_INDEX_STRIDE(0) |
+		S_008F0C_ADD_TID_ENABLE(false);
+	desc += 4;
+
+	/* stride gsvs_itemsize, num records 64
+	   elsize 4, index stride 16 */
+	/* shader will patch stride and desc[2] */
+	desc[0] = gsvs_va;
+	desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+		S_008F04_STRIDE(0) |
+		S_008F04_SWIZZLE_ENABLE(true);
+	desc[2] = 0;
+	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+		S_008F0C_ELEMENT_SIZE(1) |
+		S_008F0C_INDEX_STRIDE(1) |
+		S_008F0C_ADD_TID_ENABLE(true);
+}
+
 static VkResult
 radv_get_preamble_cs(struct radv_queue *queue,
                      uint32_t scratch_size,
                      uint32_t compute_scratch_size,
+		     uint32_t esgs_ring_size,
+		     uint32_t gsvs_ring_size,
                      struct radeon_winsys_cs **preamble_cs)
 {
 	struct radeon_winsys_bo *scratch_bo = NULL;
 	struct radeon_winsys_bo *descriptor_bo = NULL;
 	struct radeon_winsys_bo *compute_scratch_bo = NULL;
+	struct radeon_winsys_bo *esgs_ring_bo = NULL;
+	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
 	struct radeon_winsys_cs *cs = NULL;
 
-	if (!scratch_size && !compute_scratch_size) {
+	if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
 		*preamble_cs = NULL;
 		return VK_SUCCESS;
 	}
 
 	if (scratch_size <= queue->scratch_size &&
-	    compute_scratch_size <= queue->compute_scratch_size) {
+	    compute_scratch_size <= queue->compute_scratch_size &&
+	    esgs_ring_size <= queue->esgs_ring_size &&
+	    gsvs_ring_size <= queue->gsvs_ring_size) {
 		*preamble_cs = queue->preamble_cs;
 		return VK_SUCCESS;
 	}
@@ -1087,9 +1185,40 @@ radv_get_preamble_cs(struct radv_queue *queue,
 	} else
 		compute_scratch_bo = queue->compute_scratch_bo;
 
-	if (scratch_bo != queue->scratch_bo) {
+	if (esgs_ring_size > queue->esgs_ring_size) {
+		esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
+								esgs_ring_size,
+								4096,
+								RADEON_DOMAIN_VRAM,
+								RADEON_FLAG_NO_CPU_ACCESS);
+		if (!esgs_ring_bo)
+			goto fail;
+	} else
+		esgs_ring_bo = queue->esgs_ring_bo;
+
+
+	if (gsvs_ring_size > queue->gsvs_ring_size) {
+		gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
+								gsvs_ring_size,
+								4096,
+								RADEON_DOMAIN_VRAM,
+								RADEON_FLAG_NO_CPU_ACCESS);
+		if (!gsvs_ring_bo)
+			goto fail;
+	} else
+		gsvs_ring_bo = queue->gsvs_ring_bo;
+
+	if (scratch_bo != queue->scratch_bo ||
+	    esgs_ring_bo != queue->esgs_ring_bo ||
+	    gsvs_ring_bo != queue->gsvs_ring_bo) {
+		uint32_t size = 0;
+		if (gsvs_ring_bo || esgs_ring_bo)
+			size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
+		else if (scratch_bo)
+			size = 8; /* 2 dword */
+
 		descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
-		                                                 8,
+		                                                 size,
 		                                                 4096,
 		                                                 RADEON_DOMAIN_VRAM,
 		                                                 RADEON_FLAG_CPU_ACCESS);
@@ -1107,22 +1236,49 @@ radv_get_preamble_cs(struct radv_queue *queue,
 	if (scratch_bo)
 		queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
 
+	if (esgs_ring_bo)
+		queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
+
+	if (gsvs_ring_bo)
+		queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
+
 	if (descriptor_bo)
 		queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
 
 	if (descriptor_bo != queue->descriptor_bo) {
-		uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
-		uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
-		                 S_008F04_SWIZZLE_ENABLE(1);
-
 		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
 
-		map[0] = scratch_va;
-		map[1] = rsrc1;
+		if (scratch_bo) {
+			uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
+			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
+				S_008F04_SWIZZLE_ENABLE(1);
+			map[0] = scratch_va;
+			map[1] = rsrc1;
+		}
+
+		if (esgs_ring_bo || gsvs_ring_bo)
+			fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
 
 		queue->device->ws->buffer_unmap(descriptor_bo);
 	}
 
+	if (esgs_ring_bo || gsvs_ring_bo) {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+
+		if (queue->device->physical_device->rad_info.chip_class >= CIK) {
+			radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
+			radeon_emit(cs, esgs_ring_size >> 8);
+			radeon_emit(cs, gsvs_ring_size >> 8);
+		} else {
+			radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
+			radeon_emit(cs, esgs_ring_size >> 8);
+			radeon_emit(cs, gsvs_ring_size >> 8);
+		}
+	}
+
 	if (descriptor_bo) {
 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
 		                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
@@ -1174,6 +1330,20 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		queue->compute_scratch_size = compute_scratch_size;
 	}
 
+	if (esgs_ring_bo != queue->esgs_ring_bo) {
+		if (queue->esgs_ring_bo)
+			queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
+		queue->esgs_ring_bo = esgs_ring_bo;
+		queue->esgs_ring_size = esgs_ring_size;
+	}
+
+	if (gsvs_ring_bo != queue->gsvs_ring_bo) {
+		if (queue->gsvs_ring_bo)
+			queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
+		queue->gsvs_ring_bo = gsvs_ring_bo;
+		queue->gsvs_ring_size = gsvs_ring_size;
+	}
+
 	if (descriptor_bo != queue->descriptor_bo) {
 		if (queue->descriptor_bo)
 			queue->device->ws->buffer_destroy(queue->descriptor_bo);
@@ -1192,6 +1362,10 @@ fail:
 		queue->device->ws->buffer_destroy(scratch_bo);
 	if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
 		queue->device->ws->buffer_destroy(compute_scratch_bo);
+	if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
+		queue->device->ws->buffer_destroy(esgs_ring_bo);
+	if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
+		queue->device->ws->buffer_destroy(gsvs_ring_bo);
 	return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 }
 
@@ -1209,6 +1383,7 @@ VkResult radv_QueueSubmit(
 	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
 	uint32_t scratch_size = 0;
 	uint32_t compute_scratch_size = 0;
+	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
 	struct radeon_winsys_cs *preamble_cs = NULL;
 	VkResult result;
 
@@ -1222,10 +1397,12 @@ VkResult radv_QueueSubmit(
 			scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
 			compute_scratch_size = MAX2(compute_scratch_size,
 			                            cmd_buffer->compute_scratch_size_needed);
+			esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
+			gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
 		}
 	}
 
-	result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, &preamble_cs);
+	result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
 	if (result != VK_SUCCESS)
 		return result;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3a0318b..57aa9ea 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -470,10 +470,14 @@ struct radv_queue {
 
 	uint32_t scratch_size;
 	uint32_t compute_scratch_size;
+	uint32_t esgs_ring_size;
+	uint32_t gsvs_ring_size;
 
 	struct radeon_winsys_bo *scratch_bo;
 	struct radeon_winsys_bo *descriptor_bo;
 	struct radeon_winsys_bo *compute_scratch_bo;
+	struct radeon_winsys_bo *esgs_ring_bo;
+	struct radeon_winsys_bo *gsvs_ring_bo;
 	struct radeon_winsys_cs *preamble_cs;
 };
 
@@ -742,6 +746,10 @@ struct radv_cmd_buffer {
 
 	uint32_t scratch_size_needed;
 	uint32_t compute_scratch_size_needed;
+	uint32_t esgs_ring_size_needed;
+	uint32_t gsvs_ring_size_needed;
+
+	int ring_offsets_idx; /* just used for verification */
 };
 
 struct radv_image;
-- 
2.9.3



More information about the mesa-dev mailing list