[Mesa-dev] [PATCH 01/10] i965: Move down genX_upload_sbe in profiles.

Mathias.Froehlich at gmx.net Mathias.Froehlich at gmx.net
Wed Dec 12 06:44:35 UTC 2018


From: Mathias Fröhlich <mathias.froehlich at web.de>

Avoid looping over all VARYING_SLOT_MAX urb_setup array
entries from genX_upload_sbe. Prepare an array indirection
to the active entries of urb_setup already in the compile
step. On upload only walk the active arrays.

v2: Use uint8_t to store the attribute numbers.

Signed-off-by: Mathias Fröhlich <Mathias.Froehlich at web.de>
---
 src/intel/compiler/brw_compiler.h             |  7 ++++++
 src/intel/compiler/brw_fs.cpp                 | 25 +++++++++++++++++++
 src/intel/compiler/brw_fs.h                   |  2 ++
 src/intel/compiler/brw_fs_visitor.cpp         |  1 +
 src/mesa/drivers/dri/i965/genX_state_upload.c |  7 +++---
 5 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index e4f4d83c8e..427a61fd70 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -751,6 +751,13 @@ struct brw_wm_prog_data {
     * For varying slots that are not used by the FS, the value is -1.
     */
    int urb_setup[VARYING_SLOT_MAX];
+   /**
+    * Cache structure into the urb_setup array above that contains the
+    * attribute numbers of active varyings out of urb_setup.
+    * The actual count is stored in urb_setup_attribs_count.
+    */
+   uint8_t urb_setup_attribs[VARYING_SLOT_MAX];
+   uint8_t urb_setup_attribs_count;
 };
 
 /** Returns the SIMD width corresponding to a given KSP index
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 3125e5feb1..db76462ba7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1541,6 +1541,27 @@ fs_visitor::assign_curb_setup()
    this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
 }
 
+/*
+ * Build up an array of indices into the urb_setup array that
+ * references the active entries of the urb_setup array.
+ * Used to accelerate walking the active entries of the urb_setup array
+ * on each upload.
+ */
+void
+brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
+{
+   /* Make sure uint8_t is sufficient */
+   STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
+   uint8_t index = 0;
+   for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+      int input_index = wm_prog_data->urb_setup[attr];
+      if (input_index < 0)
+         continue;
+      wm_prog_data->urb_setup_attribs[index++] = attr;
+   }
+   wm_prog_data->urb_setup_attribs_count = index;
+}
+
 void
 fs_visitor::calculate_urb_setup()
 {
@@ -1629,6 +1650,8 @@ fs_visitor::calculate_urb_setup()
    }
 
    prog_data->num_varying_inputs = urb_next;
+
+   brw_compute_urb_setup_index(prog_data);
 }
 
 void
@@ -6792,6 +6815,8 @@ gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
 
    wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
    wm_prog_data->num_varying_inputs = 1;
+
+   brw_compute_urb_setup_index(wm_prog_data);
 }
 
 bool
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 163c000882..7d3b271837 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -556,4 +556,6 @@ fs_reg setup_imm_ub(const brw::fs_builder &bld,
 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
                                                nir_intrinsic_op op);
 
+void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
+
 #endif /* BRW_FS_H */
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 51a0ca2374..510f0cac47 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -120,6 +120,7 @@ fs_visitor::emit_dummy_fs()
    wm_prog_data->num_varying_inputs = devinfo->gen < 6 ? 1 : 0;
    memset(wm_prog_data->urb_setup, -1,
           sizeof(wm_prog_data->urb_setup[0]) * VARYING_SLOT_MAX);
+   brw_compute_urb_setup_index(wm_prog_data);
 
    /* We don't have any uniforms. */
    stage_prog_data->nr_params = 0;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 8e3fcbf12e..f99dc2f206 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1168,12 +1168,11 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
     * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
     */
    bool drawing_points = brw_is_drawing_points(brw);
-
-   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+   for (uint8_t index = 0; index < wm_prog_data->urb_setup_attribs_count; index++) {
+      uint8_t attr = wm_prog_data->urb_setup_attribs[index];
       int input_index = wm_prog_data->urb_setup[attr];
 
-      if (input_index < 0)
-         continue;
+      assert(0 <= input_index);
 
       /* _NEW_POINT */
       bool point_sprite = false;
-- 
2.19.2



More information about the mesa-dev mailing list