Mesa (main): turnip: Skip load/stores for tiles with no geometry
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 29 09:50:53 UTC 2022
Module: Mesa
Branch: main
Commit: 0c489f18cb27d3c725f424f8f57d45636f4eb297
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0c489f18cb27d3c725f424f8f57d45636f4eb297
Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date: Thu Apr 14 17:19:21 2022 +0300
turnip: Skip load/stores for tiles with no geometry
When HW binning is used tile loads/stores could be skipped
if there is no geometry in the tile.
Loads could be skipped when:
- The attachment won't be resolved, otherwise if load is skipped
there would be holes in the resolved attachment;
- There is no vkCmdClearAttachments afterwards since it is likely
a partial clear done via 2d blit (2d blit doesn't produce geometry).
Stores could be skipped when:
- The attachment was not cleared, which may happen by load_op or
vkCmdClearAttachments;
- When store is not a resolve.
I chose to predicate each load/store separately to allow them to be
skipped when only some attachments are cleared or resolved.
Gmem loads are moved into separate cs because whether to emit
CP_COND_REG_EXEC depends on HW binning being enabled and usage of
vkCmdClearAttachments.
CP_COND_REG_EXEC predicate could be changed during draw_cs only
by perf query, in such case the predicate should be re-emitted.
(At the moment it is always re-emitted before stores)
Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15974>
---
src/freedreno/vulkan/tu_clear_blit.c | 68 +++++++++++++++++++++-
src/freedreno/vulkan/tu_cmd_buffer.c | 106 ++++++++++++++++++++++++++++++-----
src/freedreno/vulkan/tu_pass.c | 11 ++++
src/freedreno/vulkan/tu_private.h | 10 +++-
src/freedreno/vulkan/tu_query.c | 4 ++
5 files changed, 182 insertions(+), 17 deletions(-)
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index a96be2613e2..555b5edf26f 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -2280,6 +2280,8 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
}
}
+
+ cmd->state.attachment_cmd_clear[a] = true;
}
/* We may not know the multisample count if there are no attachments, so
@@ -2551,6 +2553,8 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
if (a == VK_ATTACHMENT_UNUSED)
continue;
+ cmd->state.attachment_cmd_clear[a] = true;
+
tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
&attachments[j].clearValue);
}
@@ -2799,24 +2803,64 @@ blit_can_resolve(VkFormat format)
return true;
}
+static void
+tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs, bool load)
+{
+ tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
+}
+
+static void
+tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs, bool load)
+{
+ tu_cond_exec_end(cs);
+}
+
void
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
+ bool cond_exec_allowed,
bool force_load)
{
const struct tu_image_view *iview = cmd->state.attachments[a];
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
+ bool load_common = attachment->load || force_load;
+ bool load_stencil =
+ attachment->load_stencil ||
+ (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load);
+
+ if (!load_common && !load_stencil)
+ return;
+
trace_start_gmem_load(&cmd->trace, cs);
- if (attachment->load || force_load)
+ /* If attachment will be cleared by vkCmdClearAttachments - it is likely
+ * that it would be partially cleared, and since it is done by 2d blit
+ * it doesn't produce geometry, so we have to unconditionally load.
+ *
+ * To simplify conditions treat partially cleared separate DS as fully
+ * cleared and don't emit cond_exec.
+ */
+ bool cond_exec = cond_exec_allowed &&
+ !attachment->clear_mask &&
+ !cmd->state.attachment_cmd_clear[a] &&
+ !attachment->will_be_resolved;
+ if (cond_exec)
+ tu_begin_load_store_cond_exec(cmd, cs, true);
+
+ if (load_common)
tu_emit_blit(cmd, cs, iview, attachment, false, false);
- if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
+ if (load_stencil)
tu_emit_blit(cmd, cs, iview, attachment, false, true);
+ if (cond_exec)
+ tu_end_load_store_cond_exec(cmd, cs, true);
+
trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load);
}
@@ -2919,7 +2963,8 @@ void
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
- uint32_t gmem_a)
+ uint32_t gmem_a,
+ bool cond_exec_allowed)
{
struct tu_physical_device *phys_dev = cmd->device->physical_device;
const VkRect2D *render_area = &cmd->state.render_area;
@@ -2930,6 +2975,15 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
if (!dst->store && !dst->store_stencil)
return;
+ bool was_cleared = src->clear_mask || cmd->state.attachment_cmd_clear[a];
+ /* Unconditional store should happen only if attachment was cleared,
+ * which could have happened either by load_op or via vkCmdClearAttachments.
+ */
+ bool cond_exec = cond_exec_allowed && !was_cleared;
+ if (cond_exec) {
+ tu_begin_load_store_cond_exec(cmd, cs, false);
+ }
+
uint32_t x1 = render_area->offset.x;
uint32_t y1 = render_area->offset.y;
uint32_t x2 = x1 + render_area->extent.width;
@@ -2971,6 +3025,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
if (store_separate_stencil)
tu_emit_blit(cmd, cs, iview, src, true, true);
+ if (cond_exec) {
+ tu_end_load_store_cond_exec(cmd, cs, false);
+ }
+
trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
return;
}
@@ -3011,5 +3069,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
}
}
+ if (cond_exec) {
+ tu_end_load_store_cond_exec(cmd, cs, false);
+ }
+
trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned);
}
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index e97765ccdc8..3f1dd3831b6 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -632,6 +632,25 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
return use_sysmem;
}
+/* Optimization: there is no reason to load gmem if there is no
+ * geometry to process. COND_REG_EXEC predicate is set here,
+ * but the actual skip happens in tile_load_cs and tile_store_cs,
+ * for each blit separately.
+ */
+static void
+tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
+ uint32_t pipe, uint32_t slot, bool wfm)
+{
+ if (use_hw_binning(cmd)) {
+ tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
+ tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(pipe)) |
+ A6XX_CP_REG_TEST_0_BIT(slot) |
+ COND(wfm, A6XX_CP_REG_TEST_0_WAIT_FOR_ME));
+ } else {
+ /* COND_REG_EXECs are not emitted in non-binning case */
+ }
+}
+
static void
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@@ -664,6 +683,8 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit(cs, pipe * 4);
tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch);
+ tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, true);
+
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x0);
@@ -740,6 +761,15 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
}
}
+static void
+tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ tu6_emit_blit_scissor(cmd, cs, true);
+
+ for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
+ tu_load_gmem_attachment(cmd, cs, i, use_hw_binning(cmd), false);
+}
+
static void
tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
@@ -756,7 +786,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem_offset >= 0)
- tu_store_gmem_attachment(cmd, cs, a, a);
+ tu_store_gmem_attachment(cmd, cs, a, a, use_hw_binning(cmd));
}
if (subpass->resolve_attachments) {
@@ -764,7 +794,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED) {
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
- tu_store_gmem_attachment(cmd, cs, a, gmem_a);
+ tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
}
}
}
@@ -1220,11 +1250,6 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
- tu6_emit_blit_scissor(cmd, cs, true);
-
- for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
- tu_load_gmem_attachment(cmd, cs, i, false);
-
tu6_emit_blit_scissor(cmd, cs, false);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
@@ -1356,8 +1381,10 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
}
static void
-tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
+ uint32_t pipe, uint32_t slot)
{
+ tu_cs_emit_call(cs, &cmd->tile_load_cs);
tu_cs_emit_call(cs, &cmd->draw_cs);
if (use_hw_binning(cmd)) {
@@ -1365,6 +1392,10 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
}
+ /* Predicate is changed in draw_cs so we have to re-emit it */
+ if (cmd->state.draw_cs_writes_to_cond_pred)
+ tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, false);
+
tu_cs_emit_call(cs, &cmd->tile_store_cs);
if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) {
@@ -1418,7 +1449,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
tu6_emit_tile_select(cmd, &cmd->cs, tx, ty, pipe, slot);
trace_start_draw_ib_gmem(&cmd->trace, &cmd->cs);
- tu6_render_tile(cmd, &cmd->cs);
+ tu6_render_tile(cmd, &cmd->cs, pipe, slot);
trace_end_draw_ib_gmem(&cmd->trace, &cmd->cs);
}
}
@@ -1491,6 +1522,7 @@ tu_create_cmd_buffer(struct tu_device *device,
list_inithead(&cmd_buffer->renderpass_autotune_results);
tu_cs_init(&cmd_buffer->cs, device, TU_CS_MODE_GROW, 4096);
+ tu_cs_init(&cmd_buffer->tile_load_cs, device, TU_CS_MODE_GROW, 2048);
tu_cs_init(&cmd_buffer->draw_cs, device, TU_CS_MODE_GROW, 4096);
tu_cs_init(&cmd_buffer->tile_store_cs, device, TU_CS_MODE_GROW, 2048);
tu_cs_init(&cmd_buffer->draw_epilogue_cs, device, TU_CS_MODE_GROW, 4096);
@@ -1507,11 +1539,14 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
list_del(&cmd_buffer->pool_link);
tu_cs_finish(&cmd_buffer->cs);
+ tu_cs_finish(&cmd_buffer->tile_load_cs);
tu_cs_finish(&cmd_buffer->draw_cs);
tu_cs_finish(&cmd_buffer->tile_store_cs);
tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
tu_cs_finish(&cmd_buffer->sub_cs);
+ vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
+
u_trace_fini(&cmd_buffer->trace);
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
@@ -1535,11 +1570,15 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
cmd_buffer->record_result = VK_SUCCESS;
tu_cs_reset(&cmd_buffer->cs);
+ tu_cs_reset(&cmd_buffer->tile_load_cs);
tu_cs_reset(&cmd_buffer->draw_cs);
tu_cs_reset(&cmd_buffer->tile_store_cs);
tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
tu_cs_reset(&cmd_buffer->sub_cs);
+ vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
+ cmd_buffer->state.attachment_cmd_clear = NULL;
+
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
@@ -1678,6 +1717,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
cmd_buffer->usage_flags = pBeginInfo->flags;
tu_cs_begin(&cmd_buffer->cs);
+ tu_cs_begin(&cmd_buffer->tile_load_cs);
tu_cs_begin(&cmd_buffer->draw_cs);
tu_cs_begin(&cmd_buffer->tile_store_cs);
tu_cs_begin(&cmd_buffer->draw_epilogue_cs);
@@ -1710,6 +1750,14 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
cmd_buffer->state.subpass =
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+ /* vkCmdClearAttachments is allowed in a secondary cmdbuf and we have to
+ * track it as in primary cmdbuf.
+ */
+ cmd_buffer->state.attachment_cmd_clear =
+ vk_zalloc(&cmd_buffer->pool->vk.alloc,
+ cmd_buffer->state.pass->attachment_count *
+ sizeof(cmd_buffer->state.attachment_cmd_clear[0]),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
} else {
/* When executing in the middle of another command buffer, the CCU
* state is unknown.
@@ -2245,6 +2293,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
}
tu_cs_end(&cmd_buffer->cs);
+ tu_cs_end(&cmd_buffer->tile_load_cs);
tu_cs_end(&cmd_buffer->draw_cs);
tu_cs_end(&cmd_buffer->tile_store_cs);
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
@@ -3061,7 +3110,7 @@ vk2tu_src_stage(VkPipelineStageFlags vk_stages)
{
enum tu_stage stage = TU_STAGE_CP;
u_foreach_bit (bit, vk_stages) {
- enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, false);
+ enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, false);
stage = MAX2(stage, new_stage);
}
@@ -3073,7 +3122,7 @@ vk2tu_dst_stage(VkPipelineStageFlags vk_stages)
{
enum tu_stage stage = TU_STAGE_PS;
u_foreach_bit (bit, vk_stages) {
- enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, true);
+ enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, true);
stage = MIN2(stage, new_stage);
}
@@ -3130,6 +3179,14 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
cmd->state.has_subpass_predication = true;
if (secondary->state.disable_gmem)
cmd->state.disable_gmem = true;
+
+ cmd->state.draw_cs_writes_to_cond_pred |=
+ secondary->state.draw_cs_writes_to_cond_pred;
+
+ for (uint32_t i = 0; i < cmd->state.pass->attachment_count; i++) {
+ cmd->state.attachment_cmd_clear[i] |=
+ secondary->state.attachment_cmd_clear[i];
+ }
} else {
assert(tu_cs_is_empty(&secondary->draw_cs));
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
@@ -3307,6 +3364,18 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
return;
}
+ cmd->state.attachment_cmd_clear =
+ vk_zalloc(&cmd->pool->vk.alloc, pass->attachment_count *
+ sizeof(cmd->state.attachment_cmd_clear[0]), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (!cmd->state.attachment_cmd_clear) {
+ cmd->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+
+ cmd->state.draw_cs_writes_to_cond_pred = false;
+
for (unsigned i = 0; i < pass->attachment_count; i++) {
cmd->state.attachments[i] = pAttachmentInfo ?
tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) :
@@ -3400,7 +3469,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
- tu_store_gmem_attachment(cmd, cs, a, gmem_a);
+ tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
if (pass->attachments[a].gmem_offset < 0)
continue;
@@ -3410,7 +3479,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
* if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
*/
tu_finishme("missing GMEM->GMEM resolve path\n");
- tu_load_gmem_attachment(cmd, cs, a, true);
+ tu_load_gmem_attachment(cmd, cs, a, false, true);
}
}
@@ -4627,8 +4696,15 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
+ /* GMEM loads are created after draw_cs in the separate cs
+ * because they need to know whether to allow their conditional
+ * execution, which is tied to a state that is known only at
+ * the end of the renderpass.
+ */
+ tu6_emit_tile_load(cmd_buffer, &cmd_buffer->tile_load_cs);
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
+ tu_cs_end(&cmd_buffer->tile_load_cs);
tu_cs_end(&cmd_buffer->draw_cs);
tu_cs_end(&cmd_buffer->tile_store_cs);
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
@@ -4649,6 +4725,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
/* discard draw_cs and draw_epilogue_cs entries now that the tiles are
rendered */
+ tu_cs_discard_entries(&cmd_buffer->tile_load_cs);
+ tu_cs_begin(&cmd_buffer->tile_load_cs);
tu_cs_discard_entries(&cmd_buffer->draw_cs);
tu_cs_begin(&cmd_buffer->draw_cs);
tu_cs_discard_entries(&cmd_buffer->tile_store_cs);
@@ -4661,6 +4739,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
tu_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier, true);
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
+ vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
+ cmd_buffer->state.attachment_cmd_clear = NULL;
cmd_buffer->state.pass = NULL;
cmd_buffer->state.subpass = NULL;
diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c
index e7bc2c7da0f..64d9de2676f 100644
--- a/src/freedreno/vulkan/tu_pass.c
+++ b/src/freedreno/vulkan/tu_pass.c
@@ -800,6 +800,12 @@ tu_CreateRenderPass2(VkDevice _device,
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->resolve_attachments[j].attachment =
desc->pResolveAttachments[j].attachment;
+
+ uint32_t src_a = desc->pColorAttachments[j].attachment;
+ if (src_a != VK_ATTACHMENT_UNUSED) {
+ pass->attachments[src_a].will_be_resolved =
+ desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
+ }
}
}
@@ -808,6 +814,11 @@ tu_CreateRenderPass2(VkDevice _device,
subpass->resolve_count++;
uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
+
+ uint32_t src_a = desc->pDepthStencilAttachment->attachment;
+ if (src_a != VK_ATTACHMENT_UNUSED) {
+ pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
+ }
}
uint32_t a = desc->pDepthStencilAttachment ?
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index b35647f9887..52b4fc3bccb 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1196,6 +1196,10 @@ struct tu_cmd_state
VkRect2D render_area;
const struct tu_image_view **attachments;
+ /* Tracks whether attachment was cleared by vkCmdClearAttachments */
+ bool *attachment_cmd_clear;
+ /* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
+ bool draw_cs_writes_to_cond_pred;
bool xfb_used;
bool has_tess;
@@ -1290,6 +1294,7 @@ struct tu_cmd_buffer
VkResult record_result;
struct tu_cs cs;
+ struct tu_cs tile_load_cs;
struct tu_cs draw_cs;
struct tu_cs tile_store_cs;
struct tu_cs draw_epilogue_cs;
@@ -1576,6 +1581,7 @@ void
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
+ bool cond_exec_allowed,
bool force_load);
/* expose this function to be able to emit load without checking LOAD_OP */
@@ -1587,7 +1593,8 @@ void
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
- uint32_t gmem_a);
+ uint32_t gmem_a,
+ bool cond_exec_allowed);
enum pipe_format tu_vk_format_to_pipe_format(VkFormat vk_format);
@@ -1857,6 +1864,7 @@ struct tu_render_pass_attachment
bool load;
bool store;
int32_t gmem_offset;
+ bool will_be_resolved;
/* for D32S8 separate stencil: */
bool load_stencil;
bool store_stencil;
diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c
index 7bf710f5e30..2b374b73626 100644
--- a/src/freedreno/vulkan/tu_query.c
+++ b/src/freedreno/vulkan/tu_query.c
@@ -874,6 +874,10 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
uint32_t last_pass = ~0;
+ if (cmdbuf->state.pass) {
+ cmdbuf->state.draw_cs_writes_to_cond_pred = true;
+ }
+
/* Querying perf counters happens in these steps:
*
* 0) There's a scratch reg to set a pass index for perf counters query.
More information about the mesa-commit
mailing list