Mesa (master): iris: Disable VF cache partial address workaround on Gen11+

Tue Nov 26 20:14:02 UTC 2019

Module: Mesa
Branch: master
Commit: 3fdf2bb313b7e91f223fc45ad68adea9d5e76407
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3fdf2bb313b7e91f223fc45ad68adea9d5e76407

Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Mon Nov 25 10:04:38 2019 -0800

iris: Disable VF cache partial address workaround on Gen11+

The vertex cache uses the full 48-bit address on Gen11+.  See the
documentation for 3DSTATE_VERTEX_BUFFERS, which describes the
workaround and lists it as pre-Icelake.

Interestingly, the docs don't mention index buffers as needing a
workaround at all.  So either we've been overzealous, or the docs
never got updated to record that.  Which begs the question of whether
the issue there was fixed, if there was one...

Cuts 40% of the PIPE_CONTROLs from Civilization VI's benchmark; appears
that it improves performance by about 1-2% on Icelake 8x8 (not frequency
locked).

---

 src/gallium/drivers/iris/iris_blorp.c |  2 ++
 src/gallium/drivers/iris/iris_state.c | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c
index a1c0dbbcf4e..c2253ac913a 100644
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -204,6 +204,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                            const struct blorp_address *addrs,
                                            unsigned num_vbs)
 {
+#if GEN_GEN < 11
    struct iris_context *ice = blorp_batch->blorp->driver_ctx;
    struct iris_batch *batch = blorp_batch->driver_batch;
    bool need_invalidate = false;
@@ -224,6 +225,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                    PIPE_CONTROL_VF_CACHE_INVALIDATE |
                                    PIPE_CONTROL_CS_STALL);
    }
+#endif
 }
 
 static struct blorp_address
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index a30aa61b3ce..51dffb45eba 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -5784,6 +5784,15 @@ iris_upload_dirty_render_state(struct iris_context *ice,
       }
 
       if (count) {
+#if GEN_GEN >= 11
+         /* Gen11+ doesn't need the cache workaround below */
+         uint64_t bound = dynamic_bound;
+         while (bound) {
+            const int i = u_bit_scan64(&bound);
+            iris_use_optional_res(batch, genx->vertex_buffers[i].resource,
+                                  false);
+         }
+#else
          /* The VF cache designers cut corners, and made the cache key's
           * <VertexBufferIndex, Memory Address> tuple only consider the bottom
           * 32 bits of the address.  If you have two vertex buffers which get
@@ -5819,6 +5828,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
                                          "workaround: VF cache 32-bit key [VB]",
                                          flush_flags);
          }
+#endif
 
          const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
 
@@ -6034,6 +6044,7 @@ iris_upload_render_state(struct iris_context *ice,
          iris_use_pinned_bo(batch, bo, false);
       }
 
+#if GEN_GEN < 11
       /* The VF cache key only uses 32-bits, see vertex buffer comment above */
       uint16_t high_bits = bo->gtt_offset >> 32ull;
       if (high_bits != ice->state.last_index_bo_high_bits) {
@@ -6043,6 +6054,7 @@ iris_upload_render_state(struct iris_context *ice,
                                       PIPE_CONTROL_CS_STALL);
          ice->state.last_index_bo_high_bits = high_bits;
       }
+#endif
    }
 
 #define _3DPRIM_END_OFFSET          0x2420