<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<br>
<br>
<div class="moz-cite-prefix">On 11/6/18 12:39 AM, Jason Ekstrand
wrote:<br>
</div>
<blockquote type="cite"
cite="mid:CAOFGe94f-Or5v4Y+f_ohPEZHOzgP8XSffCV9SBUJjNh6rzuZ=A@mail.gmail.com">
<pre class="moz-quote-pre" wrap="">On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <a class="moz-txt-link-rfc2396E" href="mailto:danylo.piliaiev@gmail.com"><danylo.piliaiev@gmail.com></a>
wrote:
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">Signed-off-by: Danylo Piliaiev <a class="moz-txt-link-rfc2396E" href="mailto:danylo.piliaiev@globallogic.com"><danylo.piliaiev@globallogic.com></a>
---
src/intel/vulkan/anv_extensions.py | 1 +
src/intel/vulkan/genX_cmd_buffer.c | 155 +++++++++++++++++++++++++++++
2 files changed, 156 insertions(+)
diff --git a/src/intel/vulkan/anv_extensions.py
b/src/intel/vulkan/anv_extensions.py
index d4915c9501..7f44da6648 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -113,6 +113,7 @@ EXTENSIONS = [
Extension('VK_KHR_xlib_surface', 6,
'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_multiview', 1, True),
Extension('VK_KHR_display', 23,
'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_KHR_draw_indirect_count', 1,
'device->info.gen >= 8 || device->info.is_haswell'),
Extension('VK_EXT_acquire_xlib_display', 1,
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_debug_report', 8, True),
Extension('VK_EXT_direct_mode_display', 1,
'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git a/src/intel/vulkan/genX_cmd_buffer.c
b/src/intel/vulkan/genX_cmd_buffer.c
index 43a02f2256..d7b94efd19 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2982,6 +2982,161 @@ void genX(CmdDrawIndexedIndirect)(
}
}
+#if GEN_IS_HASWELL || GEN_GEN >= 8
+static void
+emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_address count_address,
+ uint32_t draw_index)
+{
+ /* Upload the current draw count from the draw parameters buffer to
+ * MI_PREDICATE_SRC0.
+ */
+ emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(MI_ALU_REG14));
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
Do we also need to set MI_PREDICATE_SRC0 + 4 to 0? I suspect we do.</pre>
</blockquote>
Yes.<br>
I'll also recheck other places.<br>
<blockquote type="cite"
cite="mid:CAOFGe94f-Or5v4Y+f_ohPEZHOzgP8XSffCV9SBUJjNh6rzuZ=A@mail.gmail.com">
<pre class="moz-quote-pre" wrap="">
Also, we can likely save some batch space if we have a "prepare" function
which sets MI_PREDICATE_SRC0, SRC0 + 4, and SRC1 + 4 and only emit one
LOAD_REGISTER_IMM and the MI_PREDICATE per-draw. For lots of primitives,
those extra three MI_LOAD_REGISTER_* calls will add up.
</pre>
</blockquote>
Makes sense
<blockquote type="cite"
cite="mid:CAOFGe94f-Or5v4Y+f_ohPEZHOzgP8XSffCV9SBUJjNh6rzuZ=A@mail.gmail.com">
<pre class="moz-quote-pre" wrap="">
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">+
+ /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index);
+ emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+
+ if (draw_index == 0) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+ mip.LoadOperation = LOAD_LOADINV;
+ mip.CombineOperation = COMBINE_SET;
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;
+ }
+ } else {
+ /* While draw_index < draw_count the predicate's result will be
+ * (draw_index == draw_count) ^ TRUE = TRUE
+ * When draw_index == draw_count the result is
+ * (TRUE) ^ TRUE = FALSE
+ * After this all results will be:
+ * (FALSE) ^ FALSE = FALSE
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+ mip.LoadOperation = LOAD_LOAD;
+ mip.CombineOperation = COMBINE_XOR;
+ mip.CompareOperation = COMPARE_SRCS_EQUAL;
+ }
+ }
+}
+
+void genX(CmdDrawIndirectCountKHR)(
+ VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset,
+ VkBuffer _countBuffer,
+ VkDeviceSize countBufferOffset,
+ uint32_t maxDrawCount,
+ uint32_t stride)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+ ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data =
get_vs_prog_data(pipeline);
+
+ if (anv_batch_has_error(&cmd_buffer->batch))
+ return;
+
+ genX(cmd_buffer_flush_state)(cmd_buffer);
+
+ struct anv_address count_address =
+ anv_address_add(count_buffer->address, countBufferOffset);
+
+ /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+ * command when loading the values into the predicate source registers.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.PipeControlFlushEnable = true;
+ }
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
Have you seen this be an actual problem? If not, why? A documentation
citation would be nice.
</pre>
</blockquote>
You are right - citation is needed:<br>
<br>
Volume 7: 3D-Media-GPGPU (Skylake), MI_PREDICATE:<br>
<br>
<blockquote>MI_LOAD_REGISTER_MEM commands can be used to load the
MItemp0, MItemp1,<br>
and PredicateData registers prior to MI_PREDICATE.<br>
To ensure the memory sources of the MI_LOAD_REGISTER_MEM commands
are coherent<br>
with previous 3D_PIPECONTROL store-DWord operations, software can
use the<br>
new Pipe Control Flush Enable bit in the PIPE_CONTROL command.<br>
</blockquote>
It looks like memory may be not coherent here unless it is enforced
elsewhere.
<blockquote type="cite"
cite="mid:CAOFGe94f-Or5v4Y+f_ohPEZHOzgP8XSffCV9SBUJjNh6rzuZ=A@mail.gmail.com">
<pre class="moz-quote-pre" wrap="">
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">+
+ emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address);
+ emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0);
+
+ for (uint32_t i = 0; i < maxDrawCount; i++) {
+ struct anv_address draw = anv_address_add(buffer->address, offset);
+
+ emit_draw_count_predicate(cmd_buffer, count_address, i);
+
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw,
8));
+ if (vs_prog_data->uses_drawid)
+ emit_draw_index(cmd_buffer, i);
+
+ load_indirect_parameters(cmd_buffer, draw, false);
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+ prim.IndirectParameterEnable = true;
+ prim.PredicateEnable = true;
+ prim.VertexAccessType = SEQUENTIAL;
+ prim.PrimitiveTopologyType = pipeline->topology;
+ }
+
+ offset += stride;
+ }
+}
+
+void genX(CmdDrawIndexedIndirectCountKHR)(
+ VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset,
+ VkBuffer _countBuffer,
+ VkDeviceSize countBufferOffset,
+ uint32_t maxDrawCount,
+ uint32_t stride)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+ ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+ ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+ struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+ struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+ const struct brw_vs_prog_data *vs_prog_data =
get_vs_prog_data(pipeline);
+
+ if (anv_batch_has_error(&cmd_buffer->batch))
+ return;
+
+ genX(cmd_buffer_flush_state)(cmd_buffer);
+
+ struct anv_address count_address =
+ anv_address_add(count_buffer->address, countBufferOffset);
+
+ /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+ * command when loading the values into the predicate source registers.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.PipeControlFlushEnable = true;
+ }
+
+ emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address);
+ emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0);
+
+ for (uint32_t i = 0; i < maxDrawCount; i++) {
+ struct anv_address draw = anv_address_add(buffer->address, offset);
+
+ emit_draw_count_predicate(cmd_buffer, count_address, i);
+
+ /* TODO: We need to stomp base vertex to 0 somehow */
+ if (vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance)
+ emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw,
12));
+ if (vs_prog_data->uses_drawid)
+ emit_draw_index(cmd_buffer, i);
+
+ load_indirect_parameters(cmd_buffer, draw, true);
+
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+ prim.IndirectParameterEnable = true;
+ prim.PredicateEnable = true;
+ prim.VertexAccessType = RANDOM;
+ prim.PrimitiveTopologyType = pipeline->topology;
+ }
+
+ offset += stride;
+ }
+}
+#endif
+
static VkResult
flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
{
--
2.18.0
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
</pre>
</blockquote>
<br>
</body>
</html>