[Mesa-dev] [PATCH 27/31] radv: handle emitting ring size registers at start of cs
Dave Airlie
airlied at gmail.com
Fri Jan 20 03:03:23 UTC 2017
From: Dave Airlie <airlied at redhat.com>
Work out the maximum size of the rings and patch them
in at the start of the primary buffer.
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/amd/vulkan/radv_cmd_buffer.c | 189 +++++++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_private.h | 7 ++
2 files changed, 196 insertions(+)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 64c1507..814b12e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1498,6 +1498,21 @@ VkResult radv_BeginCommandBuffer(
default:
break;
}
+
+ uint32_t pad_word = 0xffff1000U;
+ if (cmd_buffer->device->physical_device->rad_info.gfx_ib_pad_with_type2)
+ pad_word = 0x80000000;
+ cmd_buffer->ring_patch_idx = cmd_buffer->cs->cdw;
+ cmd_buffer->cs_to_patch_ring = cmd_buffer->cs->buf;
+ for (unsigned i = 0; i < 8; i++) {
+ radeon_emit(cmd_buffer->cs, pad_word);
+ }
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ radeon_emit(cmd_buffer->cs, pad_word);
+ radeon_emit(cmd_buffer->cs, pad_word);
+ radeon_emit(cmd_buffer->cs, pad_word);
+ radeon_emit(cmd_buffer->cs, pad_word);
+ }
}
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
@@ -1635,6 +1650,171 @@ VkResult radv_EndCommandBuffer(
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
si_emit_cache_flush(cmd_buffer);
+
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
+ (cmd_buffer->esgs_ring_size_needed ||
+ cmd_buffer->gsvs_ring_size_needed)) {
+ uint64_t esgs_va = 0, gsvs_va = 0;
+ uint32_t ring_offset;
+ void *ring_ptr;
+
+ if (cmd_buffer->esgs_ring_size_needed) {
+ cmd_buffer->esgs_ring =
+ cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws,
+ cmd_buffer->esgs_ring_size_needed,
+ 4096,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_CPU_ACCESS);
+ if (!cmd_buffer->esgs_ring) {
+ cmd_buffer->record_fail = true;
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
+ cmd_buffer->esgs_ring, 8);
+ esgs_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->esgs_ring);
+ }
+
+ if (cmd_buffer->gsvs_ring_size_needed) {
+ cmd_buffer->gsvs_ring =
+ cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws,
+ cmd_buffer->gsvs_ring_size_needed,
+ 4096,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_CPU_ACCESS);
+ if (!cmd_buffer->gsvs_ring) {
+ cmd_buffer->record_fail = true;
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
+ cmd_buffer->gsvs_ring, 8);
+ gsvs_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gsvs_ring);
+ }
+
+ /* 4 4-dword buffer descriptors
+ * ES entry for ES->GS ring
+ * GS entry for ES->GS ring
+ * VS entry for GS->VS ring
+ * GS entry for GS->VS ring that gets patched by shader.
+ */
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 4 * 4 * 4, 256, &ring_offset,
+ &ring_ptr);
+ {
+ uint32_t *desc = (uint32_t *)ring_ptr;
+
+ /* ES entry for ES->GS ring */
+ /* stride 0, num records - size, add tid, swizzle, elsize4,
+ index stride 64 */
+ desc[0] = esgs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(true);
+ desc[2] = cmd_buffer->esgs_ring_size_needed;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) |
+ S_008F0C_INDEX_STRIDE(3) |
+ S_008F0C_ADD_TID_ENABLE(true);
+
+ desc += 4;
+ /* GS entry for ES->GS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[0] = esgs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(false);
+ desc[2] = cmd_buffer->esgs_ring_size_needed;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(0) |
+ S_008F0C_INDEX_STRIDE(0) |
+ S_008F0C_ADD_TID_ENABLE(false);
+
+ desc += 4;
+ /* VS entry for GS->VS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[0] = gsvs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(false);
+ desc[2] = cmd_buffer->gsvs_ring_size_needed;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(0) |
+ S_008F0C_INDEX_STRIDE(0) |
+ S_008F0C_ADD_TID_ENABLE(false);
+ desc += 4;
+
+ /* stride gsvs_itemsize, num records 64
+ elsize 4, index stride 16 */
+ /* shader will patch stride and desc[2] */
+ desc[0] = gsvs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+ S_008F04_STRIDE(0) |
+ S_008F04_SWIZZLE_ENABLE(true);
+ desc[2] = 0;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1) |
+ S_008F0C_INDEX_STRIDE(1) |
+ S_008F0C_ADD_TID_ENABLE(true);
+ }
+ int idx = cmd_buffer->ring_patch_idx;
+
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_UCONFIG_REG, 2, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = (R_030900_VGT_ESGS_RING_SIZE - CIK_UCONFIG_REG_OFFSET) >> 2;
+ cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->esgs_ring_size_needed >> 8;
+ cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->gsvs_ring_size_needed >> 8;
+ } else {
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_CONFIG_REG, 2, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = (R_0088C8_VGT_ESGS_RING_SIZE - R600_CONFIG_REG_OFFSET) >> 2;
+ cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->esgs_ring_size_needed >> 8;
+ cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->gsvs_ring_size_needed >> 8;
+ }
+
+ uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo) + ring_offset;
+ uint32_t reg_base = R_00B130_SPI_SHADER_USER_DATA_VS_0 + (2 * 4);
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2;
+ cmd_buffer->cs_to_patch_ring[idx++] = va;
+ cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+
+ reg_base = R_00B230_SPI_SHADER_USER_DATA_GS_0 + (2 * 4);
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2;
+ cmd_buffer->cs_to_patch_ring[idx++] = va;
+ cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+
+ reg_base = R_00B330_SPI_SHADER_USER_DATA_ES_0 + (2 * 4);
+ cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0);
+ cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2;
+ cmd_buffer->cs_to_patch_ring[idx++] = va;
+ cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+ }
+
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
cmd_buffer->record_fail)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -1715,6 +1895,11 @@ void radv_CmdBindPipeline(
radv_dynamic_state_copy(&cmd_buffer->state.dynamic,
&pipeline->dynamic_state,
pipeline->dynamic_state_mask);
+
+ if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+ cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
+ if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
+ cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
break;
default:
assert(!"invalid bind point");
@@ -1862,6 +2047,10 @@ void radv_CmdExecuteCommands(
for (uint32_t i = 0; i < commandBufferCount; i++) {
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
+ if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
+ primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
+ if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
+ primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index aa6e09a..e4423fa 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -728,6 +728,13 @@ struct radv_cmd_buffer {
struct radv_cmd_buffer_upload upload;
bool record_fail;
+
+ uint32_t ring_patch_idx;
+ uint32_t *cs_to_patch_ring;
+ struct radeon_winsys_bo *esgs_ring;
+ struct radeon_winsys_bo *gsvs_ring;
+ uint32_t esgs_ring_size_needed;
+ uint32_t gsvs_ring_size_needed;
};
struct radv_image;
--
2.9.3
More information about the mesa-dev
mailing list