Mesa (main): turnip: Allow load/store skipping in vkCmdClearAttachments().

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 7 00:23:17 UTC 2022


Module: Mesa
Branch: main
Commit: a92fad45e9580bacc070843b158bc1ac6f7ce647
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a92fad45e9580bacc070843b158bc1ac6f7ce647

Author: Emma Anholt <emma at anholt.net>
Date:   Wed Jun  1 15:01:44 2022 -0700

turnip: Allow load/store skipping in vkCmdClearAttachments().

We have to use a 3D draw to make it possible (so it goes through the
binner's visibility calcs), but hopefully the increased overhead for apps
with non-skippable rendering balances against skipping in others.

The real motivation is to get draw-time state out of tile load setup.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16826>

---

 src/freedreno/vulkan/tu_clear_blit.c | 35 +++++++++++++++++++++++++----------
 src/freedreno/vulkan/tu_cmd_buffer.c | 29 -----------------------------
 src/freedreno/vulkan/tu_pass.c       |  5 +++++
 src/freedreno/vulkan/tu_private.h    |  6 ++++--
 4 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 93a5a3fcc3f..645b34dc779 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -2312,8 +2312,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
             s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
          }
       }
-
-      cmd->state.attachment_cmd_clear[a] = true;
    }
 
    /* We may not know the multisample count if there are no attachments, so
@@ -2587,8 +2585,6 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
          if (a == VK_ATTACHMENT_UNUSED)
                continue;
 
-         cmd->state.attachment_cmd_clear[a] = true;
-
          tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
                                        &attachments[j].clearValue);
       }
@@ -2627,6 +2623,29 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
       return;
    }
 
+   /* If we could skip tile load/stores based on any draws intersecting them at
+    * binning time, then emit the clear as a 3D draw so that it contributes to
+    * that visibility.
+   */
+   const struct tu_subpass *subpass = cmd->state.subpass;
+   for (uint32_t i = 0; i < attachmentCount; i++) {
+      uint32_t a;
+      if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+         uint32_t c = pAttachments[i].colorAttachment;
+         a = subpass->color_attachments[c].attachment;
+      } else {
+         a = subpass->depth_stencil_attachment.attachment;
+      }
+      if (a != VK_ATTACHMENT_UNUSED) {
+         const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a];
+         if (att->cond_load_allowed || att->cond_store_allowed) {
+            tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
+            return;
+         }
+      }
+   }
+
+   /* Otherwise, emit 2D blits for gmem rendering. */
    tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
    tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
    tu_cond_exec_end(cs);
@@ -2905,10 +2924,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
     * To simplify conditions treat partially cleared separate DS as fully
     * cleared and don't emit cond_exec.
     */
-   bool cond_exec = cond_exec_allowed &&
-                    !attachment->clear_mask &&
-                    !cmd->state.attachment_cmd_clear[a] &&
-                    !attachment->will_be_resolved;
+   bool cond_exec = cond_exec_allowed && attachment->cond_load_allowed;
    if (cond_exec)
       tu_begin_load_store_cond_exec(cmd, cs, true);
 
@@ -3035,11 +3051,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
    if (!dst->store && !dst->store_stencil)
       return;
 
-   bool was_cleared = src->clear_mask || cmd->state.attachment_cmd_clear[a];
    /* Unconditional store should happen only if attachment was cleared,
     * which could have happened either by load_op or via vkCmdClearAttachments.
     */
-   bool cond_exec = cond_exec_allowed && !was_cleared;
+   bool cond_exec = cond_exec_allowed && src->cond_store_allowed;
    if (cond_exec) {
       tu_begin_load_store_cond_exec(cmd, cs, false);
    }
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index ecd684a6116..8945bbf7681 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -1540,8 +1540,6 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
    tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
    tu_cs_finish(&cmd_buffer->sub_cs);
 
-   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
-
    u_trace_fini(&cmd_buffer->trace);
 
    tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
@@ -1571,9 +1569,6 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
    tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
    tu_cs_reset(&cmd_buffer->sub_cs);
 
-   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
-   cmd_buffer->state.attachment_cmd_clear = NULL;
-
    tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
 
    for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
@@ -1743,14 +1738,6 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
          cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
          cmd_buffer->state.subpass =
             &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
-         /* vkCmdClearAttachments is allowed in a secondary cmdbuf and we have to
-          * track it as in primary cmdbuf.
-          */
-         cmd_buffer->state.attachment_cmd_clear =
-            vk_zalloc(&cmd_buffer->pool->vk.alloc,
-                      cmd_buffer->state.pass->attachment_count *
-                         sizeof(cmd_buffer->state.attachment_cmd_clear[0]),
-                      8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
       } else {
          /* When executing in the middle of another command buffer, the CCU
           * state is unknown.
@@ -3277,10 +3264,6 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
          cmd->state.draw_cs_writes_to_cond_pred |=
             secondary->state.draw_cs_writes_to_cond_pred;
 
-         for (uint32_t i = 0; i < cmd->state.pass->attachment_count; i++) {
-            cmd->state.attachment_cmd_clear[i] |=
-               secondary->state.attachment_cmd_clear[i];
-         }
       } else {
          assert(tu_cs_is_empty(&secondary->draw_cs));
          assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
@@ -3477,16 +3460,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
       return;
    }
 
-   cmd->state.attachment_cmd_clear =
-      vk_zalloc(&cmd->pool->vk.alloc, pass->attachment_count *
-               sizeof(cmd->state.attachment_cmd_clear[0]), 8,
-               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-   if (!cmd->state.attachment_cmd_clear) {
-      cmd->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      return;
-   }
-
    cmd->state.draw_cs_writes_to_cond_pred = false;
 
    for (unsigned i = 0; i < pass->attachment_count; i++) {
@@ -4827,8 +4800,6 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
    tu_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier, true);
 
    vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
-   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
-   cmd_buffer->state.attachment_cmd_clear = NULL;
 
    cmd_buffer->state.pass = NULL;
    cmd_buffer->state.subpass = NULL;
diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c
index 0580f320979..c90498436e0 100644
--- a/src/freedreno/vulkan/tu_pass.c
+++ b/src/freedreno/vulkan/tu_pass.c
@@ -863,6 +863,11 @@ tu_CreateRenderPass2(VkDevice _device,
          att->clear_mask = 0;
          att->load = false;
       }
+
+      att->cond_load_allowed =
+         (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
+      att->cond_store_allowed =
+         (att->store || att->store_stencil) && !att->clear_mask;
    }
 
    /* From the VK_KHR_multiview spec:
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 2e8110c93b2..0fca4120d8e 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1201,8 +1201,6 @@ struct tu_cmd_state
    VkRect2D render_area;
 
    const struct tu_image_view **attachments;
-   /* Tracks whether attachment was cleared by vkCmdClearAttachments */
-   bool *attachment_cmd_clear;
    /* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
    bool draw_cs_writes_to_cond_pred;
 
@@ -1897,6 +1895,10 @@ struct tu_render_pass_attachment
    /* for D32S8 separate stencil: */
    bool load_stencil;
    bool store_stencil;
+
+   bool cond_load_allowed;
+   bool cond_store_allowed;
+
    int32_t gmem_offset_stencil;
 };
 



More information about the mesa-commit mailing list