Mesa (staging/20.0): intel/blorp: Always emit URB config on Gen7+

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jan 31 17:47:40 UTC 2020


Module: Mesa
Branch: staging/20.0
Commit: deeba167fd2d5964e5fd6385d24999ded0d234f8
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=deeba167fd2d5964e5fd6385d24999ded0d234f8

Author: Jason Ekstrand <jason at jlekstrand.net>
Date:   Fri Jan 17 12:09:13 2020 -0600

intel/blorp: Always emit URB config on Gen7+

Previously, i965/iris tried to reuse the currently programmed URB config
if it was good enough for BLORP, rather than reprogramming it each time.
However, this will make some things harder on Gen12+ and we've not seen
any performance impact from emitting URB more frequently in ANV.

This makes the blorp <-> driver interface a bit simpler on Gen7+ because
now all the driver has to do is to provide the L3$ config rather than
trying to hand off URB re-config to blorp.

Cc: "20.0" mesa-stable at lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
(cherry picked from commit 09e4c33085f15ffa691053143bec9dbf4aecfeaa)

---

 .pick_status.json                           |  2 +-
 src/gallium/drivers/iris/iris_blorp.c       | 20 ++-----------
 src/intel/blorp/blorp_genX_exec.h           | 45 +++++++++++++++++++++++++++--
 src/intel/vulkan/genX_blorp_exec.c          | 21 +++-----------
 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 28 ++++++++++++------
 5 files changed, 69 insertions(+), 47 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 07f0f6aa298..a9242d6da19 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -175,7 +175,7 @@
         "description": "intel/blorp: Always emit URB config on Gen7+",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c
index 15b43ee5a6a..e3584e22549 100644
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -247,24 +247,11 @@ blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
     */
 }
 
-static void
-blorp_emit_urb_config(struct blorp_batch *blorp_batch,
-                      unsigned vs_entry_size,
-                      UNUSED unsigned sf_entry_size)
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *blorp_batch)
 {
-   struct iris_context *ice = blorp_batch->blorp->driver_ctx;
    struct iris_batch *batch = blorp_batch->driver_batch;
-
-   unsigned size[4] = { vs_entry_size, 1, 1, 1 };
-
-   /* If last VS URB size is good enough for what the BLORP operation needed,
-    * then we can skip reconfiguration
-    */
-   if (ice->shaders.last_vs_entry_size >= vs_entry_size)
-      return;
-
-   genX(emit_urb_setup)(ice, batch, size, false, false);
-   ice->state.dirty |= IRIS_DIRTY_URB;
+   return batch->screen->l3_config_3d;
 }
 
 static void
@@ -346,7 +333,6 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
                          IRIS_DIRTY_UNCOMPILED_GS |
                          IRIS_DIRTY_UNCOMPILED_FS |
                          IRIS_DIRTY_VF |
-                         IRIS_DIRTY_URB |
                          IRIS_DIRTY_SF_CL_VIEWPORT |
                          IRIS_DIRTY_SAMPLER_STATES_VS |
                          IRIS_DIRTY_SAMPLER_STATES_TCS |
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 84b7cac7e67..d271bb4248c 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -27,6 +27,7 @@
 #include "blorp_priv.h"
 #include "dev/gen_device_info.h"
 #include "common/gen_sample_positions.h"
+#include "common/gen_l3_config.h"
 #include "genxml/gen_macros.h"
 
 /**
@@ -65,10 +66,8 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
                                            uint32_t *sizes,
                                            unsigned num_vbs);
 
-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch);
-#endif
 
 static void
 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
@@ -92,9 +91,14 @@ static struct blorp_address
 blorp_get_surface_base_address(struct blorp_batch *batch);
 #endif
 
+#if GEN_GEN >= 7
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch);
+# else
 static void
 blorp_emit_urb_config(struct blorp_batch *batch,
                       unsigned vs_entry_size, unsigned sf_entry_size);
+#endif
 
 static void
 blorp_emit_pipeline(struct blorp_batch *batch,
@@ -207,7 +211,42 @@ emit_urb_config(struct blorp_batch *batch,
    const unsigned sf_entry_size =
       params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;
 
+#if GEN_GEN >= 7
+   assert(sf_entry_size == 0);
+   const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
+
+   unsigned entries[4], start[4];
+   gen_get_urb_config(batch->blorp->compiler->devinfo,
+                      blorp_get_l3_config(batch),
+                      false, false, entry_size, entries, start);
+
+#if GEN_GEN == 7 && !GEN_IS_HASWELL
+   /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
+    *
+    *    "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
+    *    needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+    *    3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+    *    3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL
+    *    needs to be sent before any combination of VS associated 3DSTATE."
+    */
+   blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
+      pc.DepthStallEnable  = true;
+      pc.PostSyncOperation = WriteImmediateData;
+      pc.Address           = blorp_get_workaround_page(batch);
+   }
+#endif
+
+   for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
+      blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {
+         urb._3DCommandSubOpcode      += i;
+         urb.VSURBStartingAddress      = start[i];
+         urb.VSURBEntryAllocationSize  = entry_size[i] - 1;
+         urb.VSNumberofURBEntries      = entries[i];
+      }
+   }
+#else /* GEN_GEN < 7 */
    blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
+#endif
 }
 
 #if GEN_GEN >= 7
diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c
index 7a83eda14e8..cf94e0093d5 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -178,8 +178,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
                                                        (1 << num_vbs) - 1);
 }
 
-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)
 {
    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
@@ -188,7 +187,6 @@ blorp_get_workaround_page(struct blorp_batch *batch)
       .buffer = cmd_buffer->device->workaround_bo,
    };
 }
-#endif
 
 static void
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
@@ -197,22 +195,11 @@ blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
     */
 }
 
-static void
-blorp_emit_urb_config(struct blorp_batch *batch,
-                      unsigned vs_entry_size, unsigned sf_entry_size)
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch)
 {
-   struct anv_device *device = batch->blorp->driver_ctx;
    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
-
-   assert(sf_entry_size == 0);
-
-   const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
-
-   genX(emit_urb_setup)(device, &cmd_buffer->batch,
-                        cmd_buffer->state.current_l3_config,
-                        VK_SHADER_STAGE_VERTEX_BIT |
-                        VK_SHADER_STAGE_FRAGMENT_BIT,
-                        entry_size);
+   return cmd_buffer->state.current_l3_config;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 8769602547e..1d7f89e6c32 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -231,8 +231,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
 #endif
 }
 
-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)
 {
    assert(batch->blorp->driver_ctx == batch->driver_batch);
@@ -242,7 +241,6 @@ blorp_get_workaround_page(struct blorp_batch *batch)
       .buffer = brw->workaround_bo,
    };
 }
-#endif
 
 static void
 blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
@@ -253,6 +251,16 @@ blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
     */
 }
 
+#if GEN_GEN >= 7
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   return brw->l3.config;
+}
+#else /* GEN_GEN < 7 */
 static void
 blorp_emit_urb_config(struct blorp_batch *batch,
                       unsigned vs_entry_size,
@@ -261,18 +269,14 @@ blorp_emit_urb_config(struct blorp_batch *batch,
    assert(batch->blorp->driver_ctx == batch->driver_batch);
    struct brw_context *brw = batch->driver_batch;
 
-#if GEN_GEN >= 7
-   if (brw->urb.vsize >= vs_entry_size)
-      return;
-
-   gen7_upload_urb(brw, vs_entry_size, false, false);
-#elif GEN_GEN == 6
+#if GEN_GEN == 6
    gen6_upload_urb(brw, vs_entry_size, false, 0);
 #else
    /* We calculate it now and emit later. */
    brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
 #endif
 }
+#endif
 
 void
 genX(blorp_exec)(struct blorp_batch *batch,
@@ -387,6 +391,12 @@ retry:
    brw->no_depth_or_stencil = !params->depth.enabled &&
                               !params->stencil.enabled;
    brw->ib.index_size = -1;
+   brw->urb.vsize = 0;
+   brw->urb.gs_present = false;
+   brw->urb.gsize = 0;
+   brw->urb.tess_present = false;
+   brw->urb.hsize = 0;
+   brw->urb.dsize = 0;
 
    if (params->dst.enabled) {
       brw_render_cache_add_bo(brw, params->dst.addr.buffer,



More information about the mesa-commit mailing list