Mesa (main): anv: Add debug messages for DEBUG_PIPE_CONTROL

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 15 13:47:24 UTC 2021


Module: Mesa
Branch: main
Commit: fc5cb5400801a4476ae9148c6085f06738fa4602
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc5cb5400801a4476ae9148c6085f06738fa4602

Author: Felix DeGrood <felix.j.degrood at intel.com>
Date:   Thu Mar 11 08:40:56 2021 -0800

anv: Add debug messages for DEBUG_PIPE_CONTROL

Enable with INTEL_DEBUG=pc.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9834>

---

 src/intel/vulkan/anv_blorp.c       |  68 ++++++++++-----
 src/intel/vulkan/anv_private.h     |  17 ++++
 src/intel/vulkan/anv_util.c        |  29 +++++++
 src/intel/vulkan/genX_blorp_exec.c |  21 +++--
 src/intel/vulkan/genX_cmd_buffer.c | 174 +++++++++++++++++++++++++++----------
 src/intel/vulkan/genX_query.c      |   9 +-
 6 files changed, 237 insertions(+), 81 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index f985bf50922..2deeaa63953 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -875,7 +875,9 @@ void anv_CmdUpdateBuffer(
    /* We're about to read data that was written from the CPU.  Flush the
     * texture cache so we don't get anything stale.
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
+                             "before UpdateBuffer");
 
    while (dataSize) {
       const uint32_t copy_size = MIN2(dataSize, max_update_size);
@@ -1513,11 +1515,12 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
    /* We don't know who touched the main surface last so flush a bunch of
     * caches to ensure we get good data.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
-      ANV_PIPE_DATA_CACHE_FLUSH_BIT |
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
-      ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+                             ANV_PIPE_DATA_CACHE_FLUSH_BIT |
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
+                             "before copy_to_shadow");
 
    struct blorp_surf surf;
    get_blorp_surf_for_anv_image(cmd_buffer->device,
@@ -1553,8 +1556,9 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
    }
 
    /* We just wrote to the buffer with the render cache.  Flush it. */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
+                             "after copy_to_shadow");
 
    blorp_batch_finish(&batch);
 }
@@ -1632,8 +1636,10 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
     * performance.  If it does this, we need to flush it out of the depth
     * cache before rendering to it.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "before clear DS");
 
    blorp_clear_depth_stencil(&batch, &depth, &stencil,
                              level, base_layer, layer_count,
@@ -1649,8 +1655,10 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
     * performance.  If it does this, we need to flush it out of the render
     * cache before someone starts trying to do stencil on it.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "after clear DS");
 
    struct blorp_surf stencil_shadow;
    if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
@@ -1749,8 +1757,10 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
     * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
     * hangs when doing a clear with WM_HZ_OP.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+                             ANV_PIPE_DEPTH_STALL_BIT,
+                             "before clear hiz");
 
    blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
                                  level, base_layer, layer_count,
@@ -1780,8 +1790,10 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
     * supposedly unnecessary, we choose to perform the flush unconditionally
     * just to be safe.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+                             ANV_PIPE_DEPTH_STALL_BIT,
+                             "after clear hiz");
 }
 
 void
@@ -1832,8 +1844,10 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
     * resolve and then use a second PIPE_CONTROL after the resolve to ensure
     * that it is completed before any additional drawing occurs.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "before fast clear mcs");
 
    switch (mcs_op) {
    case ISL_AUX_OP_FAST_CLEAR:
@@ -1851,8 +1865,10 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
       unreachable("Unsupported MCS operation");
    }
 
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "after fast clear mcs");
 
    blorp_batch_finish(&batch);
 }
@@ -1913,8 +1929,10 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
     * resolve and then use a second PIPE_CONTROL after the resolve to ensure
     * that it is completed before any additional drawing occurs.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "before fast clear ccs");
 
    switch (ccs_op) {
    case ISL_AUX_OP_FAST_CLEAR:
@@ -1937,8 +1955,10 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
       unreachable("Unsupported CCS operation");
    }
 
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "after fast clear ccs");
 
    blorp_batch_finish(&batch);
 }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 6ec8f31047c..f0ba263e9cf 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -4541,6 +4541,23 @@ anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
 const struct vk_device_dispatch_table *
 anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
 
+void
+anv_dump_pipe_bits(enum anv_pipe_bits bits);
+
+static inline void
+anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
+                          enum anv_pipe_bits bits,
+                          const char* reason)
+{
+   cmd_buffer->state.pending_pipe_bits |= bits;
+   if (unlikely(INTEL_DEBUG & DEBUG_PIPE_CONTROL) && bits)
+   {
+      fputs("pc: add ", stderr);
+      anv_dump_pipe_bits(bits);
+      fprintf(stderr, "reason: %s\n", reason);
+   }
+}
+
 static inline uint32_t
 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
 {
diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c
index a1b5715396d..0d171264667 100644
--- a/src/intel/vulkan/anv_util.c
+++ b/src/intel/vulkan/anv_util.c
@@ -113,3 +113,32 @@ __vk_errorf(struct anv_instance *instance,
 
    return error;
 }
+
+void
+anv_dump_pipe_bits(enum anv_pipe_bits bits)
+{
+   if (bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT)
+      fputs("+depth_flush ", stderr);
+   if (bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT)
+      fputs("+dc_flush ", stderr);
+   if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+      fputs("+rt_flush ", stderr);
+   if (bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
+      fputs("+tile_flush ", stderr);
+   if (bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT)
+      fputs("+state_inval ", stderr);
+   if (bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT)
+      fputs("+const_inval ", stderr);
+   if (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
+      fputs("+vf_inval ", stderr);
+   if (bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT)
+      fputs("+tex_inval ", stderr);
+   if (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
+      fputs("+ic_inval ", stderr);
+   if (bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT)
+      fputs("+pb_stall ", stderr);
+   if (bits & ANV_PIPE_DEPTH_STALL_BIT)
+      fputs("+depth_stall ", stderr);
+   if (bits & ANV_PIPE_CS_STALL_BIT)
+      fputs("+cs_stall ", stderr);
+}
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 7966b30feb6..9ca1fb66284 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -258,9 +258,10 @@ genX(blorp_exec)(struct blorp_batch *batch,
     *     is set due to new association of BTI, PS Scoreboard Stall bit must
     *     be set in this packet."
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
-      ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                             "before blorp BTI change");
 #endif
 
 #if GFX_VERx10 == 120
@@ -285,8 +286,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
     * See genX(cmd_buffer_mi_memcpy) for more details.
     */
    if (params->src.clear_color_addr.buffer ||
-       params->dst.clear_color_addr.buffer)
-      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+       params->dst.clear_color_addr.buffer) {
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_CS_STALL_BIT,
+                                "before blorp prep fast clear");
+   }
 #endif
 
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -311,9 +315,10 @@ genX(blorp_exec)(struct blorp_batch *batch,
     *     is set due to new association of BTI, PS Scoreboard Stall bit must
     *     be set in this packet."
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
-      ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                             "after blorp BTI change");
 #endif
 
    cmd_buffer->state.gfx.vb_dirty = ~0;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 04eb12340b0..157fee90951 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -50,6 +50,32 @@
 static void genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
                                         uint32_t pipeline);
 
+static enum anv_pipe_bits
+convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) {
+   enum anv_pipe_bits bits = 0;
+   bits |= (pc->DepthCacheFlushEnable) ?  ANV_PIPE_DEPTH_CACHE_FLUSH_BIT : 0;
+   bits |= (pc->DCFlushEnable) ?  ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
+#if GFX_VER >= 12
+   bits |= (pc->TileCacheFlushEnable) ?  ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0;
+#endif
+   bits |= (pc->RenderTargetCacheFlushEnable) ?  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0;
+   bits |= (pc->StateCacheInvalidationEnable) ?  ANV_PIPE_STATE_CACHE_INVALIDATE_BIT : 0;
+   bits |= (pc->ConstantCacheInvalidationEnable) ?  ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT : 0;
+   bits |= (pc->TextureCacheInvalidationEnable) ?  ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT : 0;
+   bits |= (pc->InstructionCacheInvalidateEnable) ?  ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT : 0;
+   bits |= (pc->StallAtPixelScoreboard) ?  ANV_PIPE_STALL_AT_SCOREBOARD_BIT : 0;
+   bits |= (pc->DepthStallEnable) ?  ANV_PIPE_DEPTH_STALL_BIT : 0;
+   bits |= (pc->CommandStreamerStallEnable) ?  ANV_PIPE_CS_STALL_BIT : 0;
+   return bits;
+}
+
+#define anv_debug_dump_pc(pc) \
+   if (unlikely(INTEL_DEBUG & DEBUG_PIPE_CONTROL)) { \
+      fputs("pc: emit PC=( ", stderr); \
+      anv_dump_pipe_bits(convert_pc_to_bits(&(pc))); \
+      fprintf(stderr, ") reason: %s\n", __FUNCTION__); \
+   }
+
 void
 genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -87,6 +113,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
       if (devinfo->revision == 0 /* A0 */)
          pc.HDCPipelineFlushEnable = true;
 #endif
+      anv_debug_dump_pc(pc);
    }
 
 #if GFX_VER == 12
@@ -236,6 +263,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
       pc.TextureCacheInvalidationEnable = true;
       pc.ConstantCacheInvalidationEnable = true;
       pc.StateCacheInvalidationEnable = true;
+      anv_debug_dump_pc(pc);
    }
 }
 
@@ -477,7 +505,9 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
     * with not having this stall in some cases if we were really careful but
     * it's better to play it safe.  Full stall the GPU.
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "before update AUX-TT");
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
    struct mi_builder b;
@@ -548,7 +578,9 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
       }
    }
 
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
+                             "after update AUX-TT");
 }
 #endif /* GFX_VER == 12 */
 
@@ -1069,7 +1101,9 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
     * In order to work around this issue, we emit a PIPE_CONTROL with the
     * command streamer stall bit set.
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_CS_STALL_BIT,
+                             "after copy_fast_clear_dwords. Avoid potential hang");
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 #endif
 
@@ -1092,8 +1126,9 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
        *
        * In testing, SKL doesn't actually seem to need this, but HSW does.
        */
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
+                                "after copy_fast_clear_dwords surface state update");
    }
 }
 
@@ -1426,8 +1461,10 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
     * resolve and the second likely ensures that the resolve is complete before
     * we do any more rendering or clearing.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "after transition RT");
 
    for (uint32_t l = 0; l < level_count; l++) {
       uint32_t level = base_level + l;
@@ -1472,8 +1509,10 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
       }
    }
 
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "after transition RT");
 }
 
 static VkResult
@@ -1718,14 +1757,19 @@ genX(BeginCommandBuffer)(
     * VF cache occasionally.  It's easier if we can assume we start with a
     * fresh cache (See also genX(cmd_buffer_set_binding_for_gfx8_vb_flush).)
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
+                             "new cmd buffer");
 
    /* Re-emit the aux table register in every command buffer.  This way we're
     * ensured that we have the table even if this command buffer doesn't
     * initialize any images.
     */
-   if (cmd_buffer->device->info.has_aux_map)
-      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
+   if (cmd_buffer->device->info.has_aux_map) {
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
+                                "new cmd buffer with aux-tt");
+   }
 
    /* We send an "Indirect State Pointers Disable" packet at
     * EndCommandBuffer, so all push contant packets are ignored during a
@@ -1842,10 +1886,12 @@ emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
          pc.StallAtPixelScoreboard = true;
          pc.CommandStreamerStallEnable = true;
+         anv_debug_dump_pc(pc);
    }
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
          pc.IndirectStatePointersDisable = true;
          pc.CommandStreamerStallEnable = true;
+         anv_debug_dump_pc(pc);
    }
 }
 
@@ -1954,8 +2000,9 @@ genX(CmdExecuteCommands)(
     * invalidate the whole thing.
     */
    if (GFX_VER >= 8 && GFX_VER <= 9) {
-      primary->state.pending_pipe_bits |=
-         ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+      anv_add_pending_pipe_bits(primary,
+                                ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
+                                "Secondary cmd buffer not tracked in VF cache");
    }
 
    /* The secondary may have selected a different pipeline (3D or compute) and
@@ -2008,6 +2055,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
       pc.DCFlushEnable = true;
       pc.PostSyncOperation = NoWrite;
       pc.CommandStreamerStallEnable = true;
+      anv_debug_dump_pc(pc);
    }
 
    /* ...followed by a second pipelined PIPE_CONTROL that initiates
@@ -2030,6 +2078,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
       pc.InstructionCacheInvalidateEnable = true;
       pc.StateCacheInvalidationEnable = true;
       pc.PostSyncOperation = NoWrite;
+      anv_debug_dump_pc(pc);
    }
 
    /* Now send a third stalling flush to make sure that invalidation is
@@ -2039,6 +2088,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
       pc.DCFlushEnable = true;
       pc.PostSyncOperation = NoWrite;
       pc.CommandStreamerStallEnable = true;
+      anv_debug_dump_pc(pc);
    }
 
    genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg);
@@ -2238,6 +2288,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
              !pipe.DepthStallEnable &&
              !pipe.DCFlushEnable)
             pipe.StallAtPixelScoreboard = true;
+         anv_debug_dump_pc(pipe);
       }
 
       /* If a render target flush was emitted, then we can toggle off the bit
@@ -2326,6 +2377,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
             pipe.PostSyncOperation = WriteImmediateData;
             pipe.Address = cmd_buffer->device->workaround_address;
          }
+         anv_debug_dump_pc(pipe);
       }
 
 #if GFX_VER == 12
@@ -2426,9 +2478,10 @@ void genX(CmdPipelineBarrier)(
       }
    }
 
-   cmd_buffer->state.pending_pipe_bits |=
-      anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
-      anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
+                             anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags),
+                             "pipe barrier");
 }
 
 static void
@@ -3617,8 +3670,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
       }
 
       /* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */
-      if (GFX_VER >= 10)
-         cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+      if (GFX_VER >= 10) {
+         anv_add_pending_pipe_bits(cmd_buffer,
+                                   ANV_PIPE_CS_STALL_BIT,
+                                   "after 3DSTATE_SO_BUFFER call");
+      }
    }
 
    if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
@@ -3655,6 +3711,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          pc.DepthStallEnable  = true;
          pc.PostSyncOperation = WriteImmediateData;
          pc.Address           = cmd_buffer->device->workaround_address;
+         anv_debug_dump_pc(pc);
       }
    }
 #endif
@@ -4388,7 +4445,9 @@ void genX(CmdBeginTransformFeedbackEXT)(
     *    process or otherwise pending at the point that the MI_LOAD/STORE
     *    commands are processed. This will likely require a pipeline flush."
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_CS_STALL_BIT,
+                             "begin transform feedback");
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
    for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
@@ -4441,7 +4500,9 @@ void genX(CmdEndTransformFeedbackEXT)(
     *    process or otherwise pending at the point that the MI_LOAD/STORE
     *    commands are processed. This will likely require a pipeline flush."
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_CS_STALL_BIT,
+                             "end transform feedback");
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
    for (uint32_t cb_idx = 0; cb_idx < counterBufferCount; cb_idx++) {
@@ -4497,7 +4558,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
        *    these scoreboard related states, a MEDIA_STATE_FLUSH is
        *    sufficient."
        */
-      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                              ANV_PIPE_CS_STALL_BIT,
+                              "flush compute state");
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
@@ -4940,6 +5003,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
        */
       pc.DepthStallEnable = true;
 #endif
+      anv_debug_dump_pc(pc);
    }
 
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
@@ -4951,6 +5015,7 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
 #if GFX_VER >= 12
       pc.TileCacheFlushEnable = true;
 #endif
+      anv_debug_dump_pc(pc);
    }
 
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) {
@@ -5013,15 +5078,18 @@ genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
     */
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
       pipe.DepthStallEnable = true;
+      anv_debug_dump_pc(pipe);
    }
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
       pipe.DepthCacheFlushEnable = true;
 #if GFX_VER >= 12
       pipe.TileCacheFlushEnable = true;
 #endif
+      anv_debug_dump_pc(pipe);
    }
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
       pipe.DepthStallEnable = true;
+      anv_debug_dump_pc(pipe);
    }
 }
 
@@ -5096,8 +5164,10 @@ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer
    /* If our range is larger than 32 bits, we have to flush */
    assert(bound->end - bound->start <= (1ull << 32));
    if (dirty->end - dirty->start > (1ull << 32)) {
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_CS_STALL_BIT |
+                                ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
+                                "vb > 32b range");
    }
 }
 
@@ -5212,8 +5282,10 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
 
    if (cmd_buffer->state.current_hash_scale != scale &&
        (width > min_size[idx][0] || height > min_size[idx][1])) {
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_CS_STALL_BIT |
+                                ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                                "change pixel hash mode");
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
       anv_batch_write_reg(&cmd_buffer->batch, GENX(GT_MODE), gt) {
@@ -5398,8 +5470,9 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
    cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
 
    /* Accumulate any subpass flushes that need to happen before the subpass */
-   cmd_buffer->state.pending_pipe_bits |=
-      cmd_buffer->state.pass->subpass_flushes[subpass_id];
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             cmd_buffer->state.pass->subpass_flushes[subpass_id],
+                             "begin subpass deps/attachments");
 
    VkRect2D render_area = cmd_buffer->state.render_area;
    struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
@@ -5741,9 +5814,10 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
     *     is set due to new association of BTI, PS Scoreboard Stall bit must
     *     be set in this packet."
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
-      ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+                             ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
+                             "change RT");
 #endif
 
 #if GFX_VERx10 == 120
@@ -5754,10 +5828,11 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
     * we want to do a depth flush and stall, so the pipeline is not using these
     * settings while we change the registers.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
-      ANV_PIPE_DEPTH_STALL_BIT |
-      ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+                             ANV_PIPE_DEPTH_STALL_BIT |
+                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+                             "change DS");
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 #endif
 
@@ -5860,9 +5935,10 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
        * result of writes to the MSAA color attachments show up in the sampler
        * when we blit to the single-sampled resolve target.
        */
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
-         ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
+                                ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
+                                "MSAA resolve");
 
       for (uint32_t i = 0; i < subpass->color_count; ++i) {
          uint32_t src_att = subpass->color_attachments[i].attachment;
@@ -5919,9 +5995,10 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
        * result of writes to the MSAA depth attachments show up in the sampler
        * when we blit to the single-sampled resolve target.
        */
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
-         ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                              ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
+                              ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,
+                              "MSAA resolve");
 
       uint32_t src_att = subpass->depth_stencil_attachment->attachment;
       uint32_t dst_att = subpass->ds_resolve_attachment->attachment;
@@ -6151,8 +6228,9 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
     * genX_CmdNextSubpass just calls end/begin back-to-back, we just end up
     * ORing the bits in twice so it's harmless.
     */
-   cmd_buffer->state.pending_pipe_bits |=
-      cmd_buffer->state.pass->subpass_flushes[subpass_id + 1];
+   anv_add_pending_pipe_bits(cmd_buffer,
+                             cmd_buffer->state.pass->subpass_flushes[subpass_id + 1],
+                             "end subpass deps/attachments");
 }
 
 void genX(CmdBeginRenderPass2)(
@@ -6337,6 +6415,7 @@ void genX(CmdSetEvent)(
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_SET;
+      anv_debug_dump_pc(pc);
    }
 }
 
@@ -6364,6 +6443,7 @@ void genX(CmdResetEvent)(
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_RESET;
+      anv_debug_dump_pc(pc);
    }
 }
 
@@ -6436,9 +6516,10 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)(
    case VK_PERFORMANCE_OVERRIDE_TYPE_FLUSH_GPU_CACHES_INTEL:
       if (pOverrideInfo->enable) {
          /* FLUSH ALL THE THINGS! As requested by the MDAPI team. */
-         cmd_buffer->state.pending_pipe_bits |=
-            ANV_PIPE_FLUSH_BITS |
-            ANV_PIPE_INVALIDATE_BITS;
+         anv_add_pending_pipe_bits(cmd_buffer,
+                                   ANV_PIPE_FLUSH_BITS |
+                                   ANV_PIPE_INVALIDATE_BITS,
+                                   "perf counter isolation");
          genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
       }
       break;
@@ -6466,5 +6547,6 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
       pc.CommandStreamerStallEnable = true;
       pc.PostSyncOperation       = WriteTimestamp;
       pc.Address = (struct anv_address) {bo, offset};
+      anv_debug_dump_pc(pc);
    }
 }
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 26db49d51db..ab1d4e9571e 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -1374,8 +1374,9 @@ void genX(CmdCopyQueryPoolResults)(
     * command streamer.
     */
    if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) {
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
+                                "CopyQueryPoolResults");
    }
 
    if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
@@ -1393,7 +1394,9 @@ void genX(CmdCopyQueryPoolResults)(
         */
        pool->type == VK_QUERY_TYPE_OCCLUSION ||
        pool->type == VK_QUERY_TYPE_TIMESTAMP) {
-      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+      anv_add_pending_pipe_bits(cmd_buffer,
+                                ANV_PIPE_CS_STALL_BIT,
+                                "CopyQueryPoolResults");
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    }
 



More information about the mesa-commit mailing list