Mesa (master): iris: reduce redundant tile cache flushes

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Apr 15 05:05:16 UTC 2021


Module: Mesa
Branch: master
Commit: c85ea824bcab971dc2d9052b5dc937ee4b139cf5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c85ea824bcab971dc2d9052b5dc937ee4b139cf5

Author: Felix DeGrood <felix.j.degrood at intel.com>
Date:   Mon Mar 29 17:11:42 2021 -0700

iris: reduce redundant tile cache flushes

We are flushing tile cache more often than is necessary. In
unified cache mode, tile cache flushing is expensive, evicting all
depth/pixel data from the L3$. This is only need for a handful of
cases, such as: making cpu or gpu changes globally visible
(e.g. map), fast color clears, or slow depth clears. Tile cache
flushing is a gen12+ feature.

Remove blanket flushing of tile cache on all depth/RT flushes.
Replace with selective tile cache flushing.

Improves performance in several workloads:
AztecRuins.ogl-high-offscreen-1440p 1%
UnigineValley.ogl-g2                1%
Dota 2 (replay Jul 2020).ogl-g2     1%
Counter-Strike GO.ogl-g2            1%
Manhattan.ogl-Off-19x10             2%
CarChase.ogl-Off-19x10              1%
Bioshock Infinite.ogl-g2            1%

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10217>

---

 src/gallium/drivers/iris/iris_clear.c        |  8 ++++++--
 src/gallium/drivers/iris/iris_context.h      |  1 +
 src/gallium/drivers/iris/iris_fine_fence.c   |  1 +
 src/gallium/drivers/iris/iris_pipe_control.c |  1 +
 src/gallium/drivers/iris/iris_resolve.c      |  1 +
 src/gallium/drivers/iris/iris_resource.c     |  4 +++-
 src/gallium/drivers/iris/iris_state.c        | 20 ++------------------
 7 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c
index 099474e959e..a2901d617bc 100644
--- a/src/gallium/drivers/iris/iris_clear.c
+++ b/src/gallium/drivers/iris/iris_clear.c
@@ -295,7 +295,8 @@ fast_clear_color(struct iris_context *ice,
     */
    iris_emit_end_of_pipe_sync(batch,
                               "fast clear: pre-flush",
-                              PIPE_CONTROL_RENDER_TARGET_FLUSH);
+                              PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                              PIPE_CONTROL_TILE_CACHE_FLUSH);
 
    iris_batch_sync_region_start(batch);
 
@@ -493,6 +494,8 @@ fast_clear_depth(struct iris_context *ice,
                           ISL_AUX_OP_FULL_RESOLVE, false);
             iris_resource_set_aux_state(ice, res, res_level, layer, 1,
                                         ISL_AUX_STATE_RESOLVED);
+            iris_emit_pipe_control_flush(batch, "hiz op: post depth resolve",
+                                         PIPE_CONTROL_TILE_CACHE_FLUSH);
          }
       }
       const union isl_color_value clear_value = { .f32 = {depth, } };
@@ -607,7 +610,8 @@ clear_depth_stencil(struct iris_context *ice,
    blorp_batch_finish(&blorp_batch);
    iris_batch_sync_region_end(batch);
 
-   iris_flush_and_dirty_for_history(ice, batch, res, 0,
+   iris_flush_and_dirty_for_history(ice, batch, res,
+                                    PIPE_CONTROL_TILE_CACHE_FLUSH,
                                     "cache history: post slow ZS clear");
 
    if (clear_depth && z_res) {
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 865a82a1933..6403ef4948c 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -336,6 +336,7 @@ enum pipe_control_flags
 #define PIPE_CONTROL_CACHE_FLUSH_BITS \
    (PIPE_CONTROL_DEPTH_CACHE_FLUSH |  \
     PIPE_CONTROL_DATA_CACHE_FLUSH |   \
+    PIPE_CONTROL_TILE_CACHE_FLUSH |   \
     PIPE_CONTROL_RENDER_TARGET_FLUSH)
 
 #define PIPE_CONTROL_CACHE_INVALIDATE_BITS  \
diff --git a/src/gallium/drivers/iris/iris_fine_fence.c b/src/gallium/drivers/iris/iris_fine_fence.c
index a114013e8cb..04703895936 100644
--- a/src/gallium/drivers/iris/iris_fine_fence.c
+++ b/src/gallium/drivers/iris/iris_fine_fence.c
@@ -66,6 +66,7 @@ iris_fine_fence_new(struct iris_batch *batch, unsigned flags)
    } else {
       pc = PIPE_CONTROL_WRITE_IMMEDIATE |
            PIPE_CONTROL_RENDER_TARGET_FLUSH |
+           PIPE_CONTROL_TILE_CACHE_FLUSH |
            PIPE_CONTROL_DEPTH_CACHE_FLUSH |
            PIPE_CONTROL_DATA_CACHE_FLUSH;
    }
diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c
index 9f7ac249a49..97689513b72 100644
--- a/src/gallium/drivers/iris/iris_pipe_control.c
+++ b/src/gallium/drivers/iris/iris_pipe_control.c
@@ -292,6 +292,7 @@ iris_flush_all_caches(struct iris_batch *batch)
                                 PIPE_CONTROL_DATA_CACHE_FLUSH |
                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                 PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                PIPE_CONTROL_TILE_CACHE_FLUSH |
                                 PIPE_CONTROL_VF_CACHE_INVALIDATE |
                                 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
                                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c
index 5a14b8048c9..eaae971c79a 100644
--- a/src/gallium/drivers/iris/iris_resolve.c
+++ b/src/gallium/drivers/iris/iris_resolve.c
@@ -366,6 +366,7 @@ iris_cache_flush_for_render(struct iris_batch *batch,
       iris_emit_pipe_control_flush(batch,
                                    "cache tracker: aux usage mismatch",
                                    PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                   PIPE_CONTROL_TILE_CACHE_FLUSH |
                                    PIPE_CONTROL_CS_STALL);
       entry->data = v_aux_usage;
    }
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index d309f90a78b..f61c8352677 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -1540,6 +1540,7 @@ iris_map_copy_region(struct iris_transfer *map)
       iris_emit_pipe_control_flush(map->batch,
                                    "transfer read: flush before mapping",
                                    PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                   PIPE_CONTROL_TILE_CACHE_FLUSH |
                                    PIPE_CONTROL_CS_STALL);
    }
 
@@ -2051,7 +2052,8 @@ iris_transfer_flush_region(struct pipe_context *ctx,
 
    if (res->base.b.target == PIPE_BUFFER) {
       if (map->staging)
-         history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+         history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                          PIPE_CONTROL_TILE_CACHE_FLUSH;
 
       if (map->dest_had_defined_contents)
          history_flush |= iris_flush_bits_for_history(ice, res);
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 907619933b7..5f882390c4b 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -7616,23 +7616,6 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
       flags |= PIPE_CONTROL_CS_STALL;
    }
 
-   if (GFX_VER >= 12 && ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ||
-                         (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))) {
-      /* From the PIPE_CONTROL instruction table, bit 28 (Tile Cache Flush
-       * Enable):
-       *
-       *    Unified Cache (Tile Cache Disabled):
-       *
-       *    When the Color and Depth (Z) streams are enabled to be cached in
-       *    the DC space of L2, Software must use "Render Target Cache Flush
-       *    Enable" and "Depth Cache Flush Enable" along with "Tile Cache
-       *    Flush" for getting the color and depth (Z) write data to be
-       *    globally observable.  In this mode of operation it is not required
-       *    to set "CS Stall" upon setting "Tile Cache Flush" bit.
-       */
-      flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-   }
-
    if (GFX_VER == 9 && devinfo->gt == 4) {
       /* TODO: The big Skylake GT4 post sync op workaround */
    }
@@ -7737,7 +7720,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
 
    if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) {
       fprintf(stderr,
-              "  PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
+              "  PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
               (flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",
               (flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",
               (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",
@@ -7747,6 +7730,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
               (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",
               (flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",
               (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",
+              (flags & PIPE_CONTROL_TILE_CACHE_FLUSH) ? "Tile " : "",
               (flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",
               (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",
               (flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",



More information about the mesa-commit mailing list