[Mesa-dev] [PATCH 01/10] i965: Move down genX_upload_sbe in profiles.
Mathias.Froehlich at gmx.net
Mathias.Froehlich at gmx.net
Wed Dec 12 06:44:35 UTC 2018
From: Mathias Fröhlich <mathias.froehlich at web.de>
Avoid looping over all VARYING_SLOT_MAX urb_setup array
entries from genX_upload_sbe. Prepare an array indirection
to the active entries of urb_setup already in the compile
step. On upload only walk the active arrays.
v2: Use uint8_t to store the attribute numbers.
Signed-off-by: Mathias Fröhlich <Mathias.Froehlich at web.de>
---
src/intel/compiler/brw_compiler.h | 7 ++++++
src/intel/compiler/brw_fs.cpp | 25 +++++++++++++++++++
src/intel/compiler/brw_fs.h | 2 ++
src/intel/compiler/brw_fs_visitor.cpp | 1 +
src/mesa/drivers/dri/i965/genX_state_upload.c | 7 +++---
5 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index e4f4d83c8e..427a61fd70 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -751,6 +751,13 @@ struct brw_wm_prog_data {
* For varying slots that are not used by the FS, the value is -1.
*/
int urb_setup[VARYING_SLOT_MAX];
+ /**
+ * Cache structure into the urb_setup array above that contains the
+ * attribute numbers of active varyings out of urb_setup.
+ * The actual count is stored in urb_setup_attribs_count.
+ */
+ uint8_t urb_setup_attribs[VARYING_SLOT_MAX];
+ uint8_t urb_setup_attribs_count;
};
/** Returns the SIMD width corresponding to a given KSP index
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 3125e5feb1..db76462ba7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1541,6 +1541,27 @@ fs_visitor::assign_curb_setup()
this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
}
+/*
+ * Build up an array of indices into the urb_setup array that
+ * references the active entries of the urb_setup array.
+ * Used to accelerate walking the active entries of the urb_setup array
+ * on each upload.
+ */
+void
+brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
+{
+ /* Make sure uint8_t is sufficient */
+ STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
+ uint8_t index = 0;
+ for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+ int input_index = wm_prog_data->urb_setup[attr];
+ if (input_index < 0)
+ continue;
+ wm_prog_data->urb_setup_attribs[index++] = attr;
+ }
+ wm_prog_data->urb_setup_attribs_count = index;
+}
+
void
fs_visitor::calculate_urb_setup()
{
@@ -1629,6 +1650,8 @@ fs_visitor::calculate_urb_setup()
}
prog_data->num_varying_inputs = urb_next;
+
+ brw_compute_urb_setup_index(prog_data);
}
void
@@ -6792,6 +6815,8 @@ gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
wm_prog_data->num_varying_inputs = 1;
+
+ brw_compute_urb_setup_index(wm_prog_data);
}
bool
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 163c000882..7d3b271837 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -556,4 +556,6 @@ fs_reg setup_imm_ub(const brw::fs_builder &bld,
enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
nir_intrinsic_op op);
+void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
+
#endif /* BRW_FS_H */
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 51a0ca2374..510f0cac47 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -120,6 +120,7 @@ fs_visitor::emit_dummy_fs()
wm_prog_data->num_varying_inputs = devinfo->gen < 6 ? 1 : 0;
memset(wm_prog_data->urb_setup, -1,
sizeof(wm_prog_data->urb_setup[0]) * VARYING_SLOT_MAX);
+ brw_compute_urb_setup_index(wm_prog_data);
/* We don't have any uniforms. */
stage_prog_data->nr_params = 0;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 8e3fcbf12e..f99dc2f206 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1168,12 +1168,11 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
* BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
*/
bool drawing_points = brw_is_drawing_points(brw);
-
- for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+ for (uint8_t index = 0; index < wm_prog_data->urb_setup_attribs_count; index++) {
+ uint8_t attr = wm_prog_data->urb_setup_attribs[index];
int input_index = wm_prog_data->urb_setup[attr];
- if (input_index < 0)
- continue;
+ assert(0 <= input_index);
/* _NEW_POINT */
bool point_sprite = false;
--
2.19.2
More information about the mesa-dev
mailing list