<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <<a href="mailto:danylo.piliaiev@gmail.com">danylo.piliaiev@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Conditional rendering affects next functions:<br>
- vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect<br>
- vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR<br>
- vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase<br>
- vkCmdClearAttachments<br>
<br>
To reduce readings from the memory a result of the condition is calculated<br>
and stored into designated register MI_ALU_REG15.<br>
<br>
In current implementation affected functions expect MI_PREDICATE_RESULT<br>
being set before their call so any code which changes the predicate<br>
should restore it with restore_conditional_render_predicate.<br>
An alternative is to restore MI_PREDICATE_RESULT in all affected<br>
functions at their beginning.<br>
<br>
Signed-off-by: Danylo Piliaiev <<a href="mailto:danylo.piliaiev@globallogic.com" target="_blank">danylo.piliaiev@globallogic.com</a>><br>
---<br>
src/intel/vulkan/anv_blorp.c | 7 +-<br>
src/intel/vulkan/anv_device.c | 12 ++<br>
src/intel/vulkan/anv_extensions.py | 1 +<br>
src/intel/vulkan/anv_private.h | 2 +<br>
src/intel/vulkan/genX_cmd_buffer.c | 192 ++++++++++++++++++++++++++++-<br>
5 files changed, 209 insertions(+), 5 deletions(-)<br>
<br>
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c<br>
index 478b8e7a3d..157875d16f 100644<br>
--- a/src/intel/vulkan/anv_blorp.c<br>
+++ b/src/intel/vulkan/anv_blorp.c<br>
@@ -1144,8 +1144,11 @@ void anv_CmdClearAttachments(<br>
* trash our depth and stencil buffers.<br>
*/<br>
struct blorp_batch batch;<br>
- blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,<br>
- BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);<br>
+ enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;<br>
+ if (cmd_buffer->state.conditional_render_enabled) {<br>
+ flags |= BLORP_BATCH_PREDICATE_ENABLE;<br>
+ }<br>
+ blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags);<br>
<br>
for (uint32_t a = 0; a < attachmentCount; ++a) {<br>
if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {<br>
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c<br>
index a2551452eb..930a192c25 100644<br>
--- a/src/intel/vulkan/anv_device.c<br>
+++ b/src/intel/vulkan/anv_device.c<br>
@@ -957,6 +957,18 @@ void anv_GetPhysicalDeviceFeatures2(<br>
break;<br>
}<br>
<br>
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {<br>
+ VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =<br>
+ (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;<br>
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);<br>
+<br>
+ features->conditionalRendering = pdevice->info.gen >= 8 ||<br>
+ pdevice->info.is_haswell;<br>
+ features->inheritedConditionalRendering = pdevice->info.gen >= 8 ||<br>
+ pdevice->info.is_haswell;<br>
+ break;<br>
+ }<br>
+<br>
default:<br>
anv_debug_ignored_stype(ext->sType);<br>
break;<br>
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py<br>
index c13ce531ee..2ef7a52d01 100644<br>
--- a/src/intel/vulkan/anv_extensions.py<br>
+++ b/src/intel/vulkan/anv_extensions.py<br>
@@ -127,6 +127,7 @@ EXTENSIONS = [<br>
Extension('VK_EXT_vertex_attribute_divisor', 3, True),<br>
Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'),<br>
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'),<br>
+ Extension('VK_EXT_conditional_rendering', 1, 'device->info.gen >= 8 || device->info.is_haswell'),<br>
]<br>
<br>
class VkVersion:<br>
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h<br>
index 599b903f25..108da51a59 100644<br>
--- a/src/intel/vulkan/anv_private.h<br>
+++ b/src/intel/vulkan/anv_private.h<br>
@@ -2032,6 +2032,8 @@ struct anv_cmd_state {<br>
*/<br>
bool hiz_enabled;<br>
<br>
+ bool conditional_render_enabled;<br>
+<br>
/**<br>
* Array length is anv_cmd_state::pass::attachment_count. Array content is<br>
* valid only when recording a render pass instance.<br>
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c<br>
index f07a6aa7c9..87abc443b6 100644<br>
--- a/src/intel/vulkan/genX_cmd_buffer.c<br>
+++ b/src/intel/vulkan/genX_cmd_buffer.c<br>
@@ -479,8 +479,9 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,<br>
0, 0, 1, hiz_op);<br>
}<br>
<br>
-#define MI_PREDICATE_SRC0 0x2400<br>
-#define MI_PREDICATE_SRC1 0x2408<br>
+#define MI_PREDICATE_SRC0 0x2400<br>
+#define MI_PREDICATE_SRC1 0x2408<br>
+#define MI_PREDICATE_RESULT 0x2418<br>
<br>
static void<br>
set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer,<br>
@@ -545,6 +546,14 @@ mi_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)<br>
<br>
#define CS_GPR(n) (0x2600 + (n) * 8)<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+static void<br>
+restore_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)<br>
+{<br>
+ emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(MI_ALU_REG15));<br></blockquote><div><br></div><div>Does this work? Is it sufficient to just set MI_PREDICATE_RESULT or do we actually need to use an MI_PREDICATE? I genuinely don't know and this strikes me as odd.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+#endif<br>
+<br>
/* This is only really practical on haswell and above because it requires<br>
* MI math in order to get it correct.<br>
*/<br>
@@ -1144,6 +1153,12 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,<br>
}<br>
}<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_buffer->state.conditional_render_enabled) {<br>
+ restore_conditional_render_predicate(cmd_buffer);<br>
+ }<br>
+#endif<br>
+<br>
cmd_buffer->state.pending_pipe_bits |=<br>
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;<br>
}<br>
@@ -1397,6 +1412,26 @@ genX(BeginCommandBuffer)(<br>
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;<br>
}<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {<br>
+ vk_foreach_struct_const(s, pBeginInfo->pInheritanceInfo->pNext) {<br>
+ switch (s->sType) {<br>
+ case VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT: {<br>
+ const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =<br>
+ (const VkCommandBufferInheritanceConditionalRenderingInfoEXT *) s;<br>
+ /* We should emit commands as if conditional render is enabled. */<br>
+ cmd_buffer->state.conditional_render_enabled =<br>
+ conditional_rendering_info->conditionalRenderingEnable;<br></blockquote><div><br></div><div>Might be easier to just use vk_find_struct_const() instead of the loop.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ break;<br>
+ }<br>
+ default:<br>
+ anv_debug_ignored_stype(s->sType);<br>
+ break;<br>
+ }<br>
+ }<br>
+ }<br>
+#endif<br>
+<br>
return result;<br>
}<br>
<br>
@@ -1501,6 +1536,20 @@ genX(CmdExecuteCommands)(<br>
assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);<br>
assert(!anv_batch_has_error(&secondary->batch));<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (secondary->state.conditional_render_enabled) {<br>
+ /* Secondary buffer is constructed as if it will be executed<br>
+ * with conditional rendering, we should satisfy this dependency<br>
+ * regardless of conditional rendering being enabled in primary.<br>
+ */<br>
+ if (!primary->state.conditional_render_enabled) {<br>
+ emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15), 1);<br>
+ emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15) + 4, 0);<br>
+ emit_lrr(&primary->batch, MI_PREDICATE_RESULT, CS_GPR(MI_ALU_REG15));<br>
+ }<br>
+ }<br>
+#endif<br>
+<br>
if (secondary->usage_flags &<br>
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {<br>
/* If we're continuing a render pass from the primary, we need to<br>
@@ -2761,6 +2810,7 @@ void genX(CmdDraw)(<br>
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);<br>
<br>
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {<br>
+ prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;<br>
prim.VertexAccessType = SEQUENTIAL;<br>
prim.PrimitiveTopologyType = pipeline->topology;<br>
prim.VertexCountPerInstance = vertexCount;<br>
@@ -2800,6 +2850,7 @@ void genX(CmdDrawIndexed)(<br>
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);<br>
<br>
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {<br>
+ prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;<br>
prim.VertexAccessType = RANDOM;<br>
prim.PrimitiveTopologyType = pipeline->topology;<br>
prim.VertexCountPerInstance = indexCount;<br>
@@ -2935,6 +2986,7 @@ void genX(CmdDrawIndirect)(<br>
<br>
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {<br>
prim.IndirectParameterEnable = true;<br>
+ prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;<br>
prim.VertexAccessType = SEQUENTIAL;<br>
prim.PrimitiveTopologyType = pipeline->topology;<br>
}<br>
@@ -2974,6 +3026,7 @@ void genX(CmdDrawIndexedIndirect)(<br>
<br>
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {<br>
prim.IndirectParameterEnable = true;<br>
+ prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;<br>
prim.VertexAccessType = RANDOM;<br>
prim.PrimitiveTopologyType = pipeline->topology;<br>
}<br>
@@ -3024,6 +3077,42 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,<br>
}<br>
}<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+static void<br>
+emit_draw_count_predicate_with_conditional_render(<br>
+ struct anv_cmd_buffer *cmd_buffer,<br>
+ struct anv_address count_address,<br>
+ uint32_t draw_index)<br>
+{<br>
+ const int draw_index_reg = MI_ALU_REG0;<br>
+ const int draw_count_reg = MI_ALU_REG14;<br>
+ const int condition_reg = MI_ALU_REG15;<br>
+ const int tmp_result_reg = MI_ALU_REG1;<br>
+<br>
+ emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index);<br>
+ emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0);<br>
+<br>
+ uint32_t *dw;<br>
+ /* Compute (draw_index < draw_count).<br>
+ * We do this by subtracting and storing the carry bit.<br>
+ */<br>
+ dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));<br>
+ dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg);<br>
+ dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, draw_count_reg);<br>
+ dw[3] = mi_alu(MI_ALU_SUB, 0, 0);<br>
+ dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF);<br>
+<br>
+ /* & condition */<br>
+ dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));<br>
+ dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg);<br>
+ dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, condition_reg);<br>
+ dw[3] = mi_alu(MI_ALU_AND, 0, 0);<br>
+ dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU);<br>
+<br>
+ emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(tmp_result_reg));<br></blockquote><div><br></div><div>Again, is this sufficient? Maybe I'm missing something.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+#endif<br>
+<br>
void genX(CmdDrawIndirectCountKHR)(<br>
VkCommandBuffer commandBuffer,<br>
VkBuffer _buffer,<br>
@@ -3063,7 +3152,15 @@ void genX(CmdDrawIndirectCountKHR)(<br>
for (uint32_t i = 0; i < maxDrawCount; i++) {<br>
struct anv_address draw = anv_address_add(buffer->address, offset);<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_state->conditional_render_enabled) {<br>
+ emit_draw_count_predicate_with_conditional_render(cmd_buffer, count_address, i);<br>
+ } else {<br>
+ emit_draw_count_predicate(cmd_buffer, count_address, i);<br>
+ }<br>
+#else<br>
emit_draw_count_predicate(cmd_buffer, count_address, i);<br>
+#endif<br>
<br>
if (vs_prog_data->uses_firstvertex ||<br>
vs_prog_data->uses_baseinstance)<br>
@@ -3082,6 +3179,12 @@ void genX(CmdDrawIndirectCountKHR)(<br>
<br>
offset += stride;<br>
}<br>
+<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_state->conditional_render_enabled) {<br>
+ restore_conditional_render_predicate(cmd_buffer);<br>
+ }<br>
+#endif<br>
}<br>
<br>
void genX(CmdDrawIndexedIndirectCountKHR)(<br>
@@ -3123,7 +3226,15 @@ void genX(CmdDrawIndexedIndirectCountKHR)(<br>
for (uint32_t i = 0; i < maxDrawCount; i++) {<br>
struct anv_address draw = anv_address_add(buffer->address, offset);<br>
<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_state->conditional_render_enabled) {<br>
+ emit_draw_count_predicate_with_conditional_render(cmd_buffer, count_address, i);<br>
+ } else {<br>
+ emit_draw_count_predicate(cmd_buffer, count_address, i);<br>
+ }<br>
+#else<br>
emit_draw_count_predicate(cmd_buffer, count_address, i);<br>
+#endif<br>
<br>
/* TODO: We need to stomp base vertex to 0 somehow */<br>
if (vs_prog_data->uses_firstvertex ||<br>
@@ -3143,6 +3254,12 @@ void genX(CmdDrawIndexedIndirectCountKHR)(<br>
<br>
offset += stride;<br>
}<br>
+<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+ if (cmd_state->conditional_render_enabled) {<br>
+ restore_conditional_render_predicate(cmd_buffer);<br>
+ }<br>
+#endif<br>
}<br>
<br>
static VkResult<br>
@@ -3351,6 +3468,7 @@ void genX(CmdDispatchBase)(<br>
genX(cmd_buffer_flush_compute_state)(cmd_buffer);<br>
<br>
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {<br>
+ ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;<br>
ggw.SIMDSize = prog_data->simd_size / 16;<br>
ggw.ThreadDepthCounterMaximum = 0;<br>
ggw.ThreadHeightCounterMaximum = 0;<br>
@@ -3448,7 +3566,8 @@ void genX(CmdDispatchIndirect)(<br>
<br>
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {<br>
ggw.IndirectParameterEnable = true;<br>
- ggw.PredicateEnable = GEN_GEN <= 7;<br>
+ ggw.PredicateEnable = GEN_GEN <= 7 ||<br>
+ cmd_buffer->state.conditional_render_enabled;<br>
ggw.SIMDSize = prog_data->simd_size / 16;<br>
ggw.ThreadDepthCounterMaximum = 0;<br>
ggw.ThreadHeightCounterMaximum = 0;<br>
@@ -4158,3 +4277,70 @@ void genX(CmdEndRenderPass2KHR)(<br>
{<br>
genX(CmdEndRenderPass)(commandBuffer);<br>
}<br>
+<br>
+#if GEN_GEN >= 8 || GEN_IS_HASWELL<br>
+void genX(CmdBeginConditionalRenderingEXT)(<br>
+ VkCommandBuffer commandBuffer,<br>
+ const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)<br>
+{<br>
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);<br>
+ ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer);<br>
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;<br>
+ struct anv_address value_address =<br>
+ anv_address_add(buffer->address, pConditionalRenderingBegin->offset);<br>
+<br>
+ const bool inverted = pConditionalRenderingBegin->flags &<br>
+ VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;<br>
+<br>
+ cmd_state->conditional_render_enabled = true;<br>
+<br>
+ /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM<br>
+ * command when loading the values into the predicate source registers.<br>
+ */<br>
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {<br>
+ pc.PipeControlFlushEnable = true;<br>
+ }<br>
+<br>
+ /* Section 19.4 of the Vulkan 1.1.85 spec says:<br>
+ *<br>
+ * If the value of the predicate in buffer memory changes<br>
+ * while conditional rendering is active, the rendering commands<br>
+ * may be discarded in an implementation-dependent way.<br>
+ * Some implementations may latch the value of the predicate<br>
+ * upon beginning conditional rendering while others<br>
+ * may read it before every rendering command.<br>
+ *<br>
+ * So it's perfectly fine to read a value from the buffer once.<br>
+ */<br>
+<br>
+ emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, value_address);<br>
+ /* Zero the top 32-bits of MI_PREDICATE_SRC0 */<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);<br>
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);<br>
+<br>
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {<br>
+ mip.LoadOperation = inverted ? LOAD_LOAD : LOAD_LOADINV;<br>
+ mip.CombineOperation = COMBINE_SET;<br>
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;<br>
+ }<br>
+<br>
+ /* Calculate predicate result once and store it in MI_ALU_REG15<br>
+ * to prevent recalculating it when interacting with<br>
+ * VK_KHR_draw_indirect_count which also uses predicates.<br>
+ * It is also the only way to support conditional render of<br>
+ * secondary buffers because they are formed before we<br>
+ * know whether conditional render is enabled.<br>
+ */<br>
+ emit_lrr(&cmd_buffer->batch, CS_GPR(MI_ALU_REG15), MI_PREDICATE_RESULT);<br>
+}<br>
+<br>
+void genX(CmdEndConditionalRenderingEXT)(<br>
+ VkCommandBuffer commandBuffer)<br>
+{<br>
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);<br>
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;<br>
+<br>
+ cmd_state->conditional_render_enabled = false;<br>
+}<br>
+#endif<br>
-- <br>
2.18.0<br>
<br>
</blockquote></div></div>