<div dir="auto"><div><div class="gmail_extra"><div class="gmail_quote">On Feb 9, 2017 8:25 PM, "Dave Airlie" <<a href="mailto:airlied@gmail.com">airlied@gmail.com</a>> wrote:<br type="attribution"><blockquote class="quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
<br>
If a buffer is just full of flushes we flush things on command<br>
buffer submission, so don't bother submitting these.<br>
<br>
This will reduce some CPU overhead on dota2, which submits a fair<br>
few command streams that don't end up drawing anything.</blockquote></div></div></div><div dir="auto"><br></div><div dir="auto">I wrote basically the same patch for our driver earlier this year when I was preparing for our GDC Dota 2 demo.  I noticed an improvement at the time but I'm pretty sure it was just because of the stalls we had due to relocations.  Now that those stalls are gone, I'm not convinced it would do much.  Did you actually measure a performance improvement or was this just a little CPU usage reduction?</div><div dir="auto"><br></div><div dir="auto"></div><div dir="auto"><div class="gmail_extra"><div class="gmail_quote"><blockquote class="quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Signed-off-by: Dave Airlie <<a href="mailto:airlied@redhat.com">airlied@redhat.com</a>><br>
---<br>
 src/amd/vulkan/radv_cmd_<wbr>buffer.c  |  3 +++<br>
 src/amd/vulkan/radv_device.c      | 14 +++++++++-----<br>
 src/amd/vulkan/radv_meta_<wbr>buffer.c |  1 +<br>
 src/amd/vulkan/radv_private.h     |  2 ++<br>
 src/amd/vulkan/si_cmd_buffer.c    |  2 +-<br>
 5 files changed, 16 insertions(+), 6 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_cmd_<wbr>buffer.c b/src/amd/vulkan/radv_cmd_<wbr>buffer.c<br>
index f281f33..25b1bd6 100644<br>
--- a/src/amd/vulkan/radv_cmd_<wbr>buffer.c<br>
+++ b/src/amd/vulkan/radv_cmd_<wbr>buffer.c<br>
@@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(<wbr>struct radv_cmd_buffer *cmd_buffer)<br>
        MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer-<wbr>>device->ws,<br>
                                                           cmd_buffer->cs, 4096);<br>
<br>
+       cmd_buffer->no_draws = false;<br>
        if ((cmd_buffer->state.vertex_<wbr>descriptors_dirty || cmd_buffer->state.vb_dirty) &&<br>
            cmd_buffer->state.pipeline-><wbr>num_vertex_attribs) {<br>
                unsigned vb_offset;<br>
@@ -1592,6 +1593,7 @@ static void  radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)<br>
        cmd_buffer->record_fail = false;<br>
<br>
        cmd_buffer->ring_offsets_idx = -1;<br>
+       cmd_buffer->no_draws = true;<br>
 }<br>
<br>
 VkResult radv_ResetCommandBuffer(<br>
@@ -2423,6 +2425,7 @@ void radv_<wbr>CmdDrawIndexedIndirectCountAMD<wbr>(<br>
 static void<br>
 radv_flush_compute_state(<wbr>struct radv_cmd_buffer *cmd_buffer)<br>
 {<br>
+       cmd_buffer->no_draws = false;<br>
        radv_emit_compute_pipeline(<wbr>cmd_buffer);<br>
        radv_flush_descriptors(cmd_<wbr>buffer, cmd_buffer->state.compute_<wbr>pipeline,<br>
                               VK_SHADER_STAGE_COMPUTE_BIT);<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index 8a54a2a..fddada4 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -1448,21 +1448,25 @@ VkResult radv_QueueSubmit(<br>
                cs_array = malloc(sizeof(struct radeon_winsys_cs *) *<br>
                                                pSubmits[i].<wbr>commandBufferCount);<br>
<br>
+               int draw_cmds_count = 0;<br>
                for (uint32_t j = 0; j < pSubmits[i].<wbr>commandBufferCount; j++) {<br>
                        RADV_FROM_HANDLE(radv_cmd_<wbr>buffer, cmd_buffer,<br>
                                         pSubmits[i].pCommandBuffers[j]<wbr>);<br>
                        assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_<wbr>PRIMARY);<br>
-<br>
-                       cs_array[j] = cmd_buffer->cs;<br>
+                       if (cmd_buffer->no_draws == true) {<br>
+                               continue;<br>
+                       }<br>
+                       cs_array[draw_cmds_count] = cmd_buffer->cs;<br>
+                       draw_cmds_count++;<br>
                        if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_<wbr>SIMULTANEOUS_USE_BIT))<br>
                                can_patch = false;<br>
                }<br>
<br>
-               for (uint32_t j = 0; j < pSubmits[i].<wbr>commandBufferCount; j += advance) {<br>
+               for (uint32_t j = 0; j < draw_cmds_count; j += advance) {<br>
                        advance = MIN2(max_cs_submission,<br>
-                                      pSubmits[i].commandBufferCount - j);<br>
+                                      draw_cmds_count - j);<br>
                        bool b = j == 0;<br>
-                       bool e = j + advance == pSubmits[i].<wbr>commandBufferCount;<br>
+                       bool e = j + advance == draw_cmds_count;<br>
<br>
                        if (queue->device->trace_bo)<br>
                                *queue->device->trace_id_ptr = 0;<br>
diff --git a/src/amd/vulkan/radv_meta_<wbr>buffer.c b/src/amd/vulkan/radv_meta_<wbr>buffer.c<br>
index cd2973f..4857d3d 100644<br>
--- a/src/amd/vulkan/radv_meta_<wbr>buffer.c<br>
+++ b/src/amd/vulkan/radv_meta_<wbr>buffer.c<br>
@@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer(<br>
        assert(!(dataSize & 3));<br>
        assert(!(va & 3));<br>
<br>
+       cmd_buffer->no_draws = false;<br>
        if (dataSize < 4096) {<br>
                cmd_buffer->device->ws->cs_<wbr>add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);<br>
<br>
diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index 25ed5de..9a88ce0 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -750,6 +750,8 @@ struct radv_cmd_buffer {<br>
        uint32_t gsvs_ring_size_needed;<br>
<br>
        int ring_offsets_idx; /* just used for verification */<br>
+<br>
+       bool no_draws;<br>
 };<br>
<br>
 struct radv_image;<br>
diff --git a/src/amd/vulkan/si_cmd_<wbr>buffer.c b/src/amd/vulkan/si_cmd_<wbr>buffer.c<br>
index 1c99b22..b94c1f1 100644<br>
--- a/src/amd/vulkan/si_cmd_<wbr>buffer.c<br>
+++ b/src/amd/vulkan/si_cmd_<wbr>buffer.c<br>
@@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(<wbr>struct radv_cmd_buffer *cmd_buffer,<br>
 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,<br>
                              uint64_t remaining_size, unsigned *flags)<br>
 {<br>
-<br>
+       cmd_buffer->no_draws = false;<br>
        /* Flush the caches for the first copy only.<br>
         * Also wait for the previous CP DMA operations.<br>
         */<br>
<font color="#888888">--<br>
2.7.4<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></blockquote></div><br></div></div></div>