Mesa (master): turnip: implement VK_KHR_shader_draw_parameters
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Jun 25 16:18:58 UTC 2020
Module: Mesa
Branch: master
Commit: 62de79ac4492ac9e4af99b9a25c15cda1114e7d9
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62de79ac4492ac9e4af99b9a25c15cda1114e7d9
Author: Jonathan Marek <jonathan at marek.ca>
Date: Wed Jun 24 16:00:30 2020 -0400
turnip: implement VK_KHR_shader_draw_parameters
Note: going by the blob, VFD_INDEX_OFFSET/FD_INSTANCE_START_OFFSET seem
completely unused by indirect draws, so this changes them to only be set
for non-indirect draws (and moves them to the vs_params draw state).
Passes dEQP-VK.draw.shader_draw_parameters.*
Signed-off-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5635>
---
src/freedreno/vulkan/tu_cmd_buffer.c | 184 +++++++++++++++++++++-------------
src/freedreno/vulkan/tu_device.c | 22 +++-
src/freedreno/vulkan/tu_extensions.py | 1 +
src/freedreno/vulkan/tu_private.h | 2 +
src/freedreno/vulkan/tu_shader.c | 1 +
5 files changed, 136 insertions(+), 74 deletions(-)
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 4441e5f725a..e59cabb9756 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -919,6 +919,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_GS_SIV_CNTL, 0x0000ffff);
+ /* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
@@ -2973,46 +2974,6 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
return tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
}
-static VkResult
-tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
- uint32_t first_instance,
- struct tu_cs_entry *entry)
-{
- /* TODO: fill out more than just base instance */
- const struct tu_program_descriptor_linkage *link =
- &cmd->state.pipeline->program.link[MESA_SHADER_VERTEX];
- const struct ir3_const_state *const_state = &link->const_state;
- struct tu_cs cs;
-
- if (const_state->offsets.driver_param >= link->constlen) {
- *entry = (struct tu_cs_entry) {};
- return VK_SUCCESS;
- }
-
- VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 8, &cs);
- if (result != VK_SUCCESS)
- return result;
-
- tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
- tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(const_state->offsets.driver_param) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
- CP_LOAD_STATE6_0_NUM_UNIT(1));
- tu_cs_emit(&cs, 0);
- tu_cs_emit(&cs, 0);
-
- STATIC_ASSERT(IR3_DP_INSTID_BASE == 2);
-
- tu_cs_emit(&cs, 0);
- tu_cs_emit(&cs, 0);
- tu_cs_emit(&cs, first_instance);
- tu_cs_emit(&cs, 0);
-
- *entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
- return VK_SUCCESS;
-}
-
static struct tu_cs_entry
tu6_emit_vertex_buffers(struct tu_cmd_buffer *cmd,
const struct tu_pipeline *pipeline)
@@ -3156,9 +3117,7 @@ static VkResult
tu6_draw_common(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
bool indexed,
- uint32_t vertex_offset,
- uint32_t first_instance,
- /* note: draw_count count is 0 for indirect */
+ /* note: draw_count is 0 for indirect */
uint32_t draw_count)
{
const struct tu_pipeline *pipeline = cmd->state.pipeline;
@@ -3171,10 +3130,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
/* TODO lrz */
- tu_cs_emit_regs(cs,
- A6XX_VFD_INDEX_OFFSET(vertex_offset),
- A6XX_VFD_INSTANCE_START_OFFSET(first_instance));
-
tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0(
.primitive_restart =
pipeline->ia.primitive_restart && indexed,
@@ -3225,11 +3180,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline);
- struct tu_cs_entry vs_params;
- result = tu6_emit_vs_params(cmd, first_instance, &vs_params);
- if (result != VK_SUCCESS)
- return result;
-
bool has_tess =
pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
struct tu_cs_entry tess_consts = {};
@@ -3269,7 +3219,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
- tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
+ tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) {
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i,
@@ -3306,7 +3256,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
- tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
+ tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
}
tu_cs_sanity_check(cs);
@@ -3352,6 +3302,68 @@ tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel)
return initiator;
}
+
+static uint32_t
+vs_params_offset(struct tu_cmd_buffer *cmd)
+{
+ const struct tu_program_descriptor_linkage *link =
+ &cmd->state.pipeline->program.link[MESA_SHADER_VERTEX];
+ const struct ir3_const_state *const_state = &link->const_state;
+
+ if (const_state->offsets.driver_param >= link->constlen)
+ return 0;
+
+ /* this layout is required by CP_DRAW_INDIRECT_MULTI */
+ STATIC_ASSERT(IR3_DP_DRAWID == 0);
+ STATIC_ASSERT(IR3_DP_VTXID_BASE == 1);
+ STATIC_ASSERT(IR3_DP_INSTID_BASE == 2);
+
+ /* 0 means disabled for CP_DRAW_INDIRECT_MULTI */
+ assert(const_state->offsets.driver_param != 0);
+
+ return const_state->offsets.driver_param;
+}
+
+static struct tu_draw_state
+tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
+ uint32_t vertex_offset,
+ uint32_t first_instance)
+{
+ uint32_t offset = vs_params_offset(cmd);
+
+ struct tu_cs cs;
+ VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs);
+ if (result != VK_SUCCESS) {
+ cmd->record_result = result;
+ return (struct tu_draw_state) {};
+ }
+
+ /* TODO: don't make a new draw state when it doesn't change */
+
+ tu_cs_emit_regs(&cs,
+ A6XX_VFD_INDEX_OFFSET(vertex_offset),
+ A6XX_VFD_INSTANCE_START_OFFSET(first_instance));
+
+ if (offset) {
+ tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
+ tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit(&cs, 0);
+ tu_cs_emit(&cs, 0);
+
+ tu_cs_emit(&cs, 0);
+ tu_cs_emit(&cs, vertex_offset);
+ tu_cs_emit(&cs, first_instance);
+ tu_cs_emit(&cs, 0);
+ }
+
+ struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
+ return (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
+}
+
void
tu_CmdDraw(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
@@ -3362,7 +3374,9 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
- tu6_draw_common(cmd, cs, false, firstVertex, firstInstance, vertexCount);
+ cmd->state.vs_params = tu6_emit_vs_params(cmd, firstVertex, firstInstance);
+
+ tu6_draw_common(cmd, cs, false, vertexCount);
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
@@ -3381,7 +3395,9 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
- tu6_draw_common(cmd, cs, true, vertexOffset, firstInstance, indexCount);
+ cmd->state.vs_params = tu6_emit_vs_params(cmd, vertexOffset, firstInstance);
+
+ tu6_draw_common(cmd, cs, true, indexCount);
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
@@ -3403,13 +3419,25 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_buffer, buf, _buffer);
struct tu_cs *cs = &cmd->draw_cs;
- tu6_draw_common(cmd, cs, false, 0, 0, 0);
+ cmd->state.vs_params = (struct tu_draw_state) {};
- for (uint32_t i = 0; i < drawCount; i++) {
- tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT, 3);
- tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
- tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset + stride * i);
- }
+ tu6_draw_common(cmd, cs, false, 0);
+
+ /* workaround for a firmware bug with CP_DRAW_INDIRECT_MULTI, where it
+ * doesn't wait for WFIs to be completed and leads to GPU fault/hang
+ * TODO: this could be worked around in a more performant way,
+ * or there may exist newer firmware that has been fixed
+ */
+ if (cmd->device->physical_device->gpu_id != 650)
+ tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
+
+ tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 6);
+ tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
+ tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL) |
+ A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
+ tu_cs_emit(cs, drawCount);
+ tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset);
+ tu_cs_emit(cs, stride);
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
}
@@ -3425,15 +3453,27 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_buffer, buf, _buffer);
struct tu_cs *cs = &cmd->draw_cs;
- tu6_draw_common(cmd, cs, true, 0, 0, 0);
+ cmd->state.vs_params = (struct tu_draw_state) {};
- for (uint32_t i = 0; i < drawCount; i++) {
- tu_cs_emit_pkt7(cs, CP_DRAW_INDX_INDIRECT, 6);
- tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
- tu_cs_emit_qw(cs, cmd->state.index_va);
- tu_cs_emit(cs, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(cmd->state.max_index_count));
- tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset + stride * i);
- }
+ tu6_draw_common(cmd, cs, true, 0);
+
+ /* workaround for a firmware bug with CP_DRAW_INDIRECT_MULTI, where it
+ * doesn't wait for WFIs to be completed and leads to GPU fault/hang
+ * TODO: this could be worked around in a more performant way,
+ * or there may exist newer firmware that has been fixed
+ */
+ if (cmd->device->physical_device->gpu_id != 650)
+ tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
+
+ tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 9);
+ tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
+ tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED) |
+ A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
+ tu_cs_emit(cs, drawCount);
+ tu_cs_emit_qw(cs, cmd->state.index_va);
+ tu_cs_emit(cs, cmd->state.max_index_count);
+ tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset);
+ tu_cs_emit(cs, stride);
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
}
@@ -3450,7 +3490,9 @@ void tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_buffer, buf, _counterBuffer);
struct tu_cs *cs = &cmd->draw_cs;
- tu6_draw_common(cmd, cs, false, 0, firstInstance, 0);
+ cmd->state.vs_params = tu6_emit_vs_params(cmd, 0, firstInstance);
+
+ tu6_draw_common(cmd, cs, false, 0);
tu_cs_emit_pkt7(cs, CP_DRAW_AUTO, 6);
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_XFB));
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 225cfee3765..994b8a22331 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -590,8 +590,8 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.sampleRateShading = true,
.dualSrcBlend = true,
.logicOp = true,
- .multiDrawIndirect = false,
- .drawIndirectFirstInstance = false,
+ .multiDrawIndirect = true,
+ .drawIndirectFirstInstance = true,
.depthClamp = true,
.depthBiasClamp = false,
.fillModeNonSolid = false,
@@ -636,6 +636,22 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
vk_foreach_struct(ext, pFeatures->pNext)
{
switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES:
+ *((VkPhysicalDeviceVulkan11Features*) ext) = (VkPhysicalDeviceVulkan11Features) {
+ .storageBuffer16BitAccess = false,
+ .uniformAndStorageBuffer16BitAccess = false,
+ .storagePushConstant16 = false,
+ .storageInputOutput16 = false,
+ .multiview = false,
+ .multiviewGeometryShader = false,
+ .multiviewTessellationShader = false,
+ .variablePointersStorageBuffer = false,
+ .variablePointers = false,
+ .protectedMemory = false,
+ .samplerYcbcrConversion = true,
+ .shaderDrawParameters = true,
+ };
+ break;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext;
features->variablePointersStorageBuffer = false;
@@ -653,7 +669,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
VkPhysicalDeviceShaderDrawParametersFeatures *features =
(VkPhysicalDeviceShaderDrawParametersFeatures *) ext;
- features->shaderDrawParameters = false;
+ features->shaderDrawParameters = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index 9a14bb6c89f..ab47577150c 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -81,6 +81,7 @@ EXTENSIONS = [
Extension('VK_EXT_filter_cubic', 1, 'device->gpu_id == 650'),
Extension('VK_EXT_index_type_uint8', 1, True),
Extension('VK_EXT_vertex_attribute_divisor', 1, True),
+ Extension('VK_KHR_shader_draw_parameters', 1, True),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 5392f3f2726..2bfd5eb18f4 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -815,6 +815,8 @@ struct tu_cmd_state
struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib;
+ struct tu_draw_state vs_params;
+
/* Index buffer */
uint64_t index_va;
uint32_t max_index_count;
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index b25a959b89e..930d10b6985 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -48,6 +48,7 @@ tu_spirv_to_nir(struct ir3_compiler *compiler,
.caps = {
.transform_feedback = true,
.tessellation = true,
+ .draw_parameters = true,
},
};
const nir_shader_compiler_options *nir_options =
More information about the mesa-commit
mailing list