[Mesa-dev] [PATCH 4/7] i965/cs: Reserve local invocation id in payload regs
Jordan Justen
jordan.l.justen at intel.com
Mon Aug 3 23:00:08 PDT 2015
Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
---
src/mesa/drivers/dri/i965/brw_cs.cpp | 29 +++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_cs.h | 5 +++++
src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +++++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
4 files changed, 46 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index b566b92..541151a 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -444,6 +444,35 @@ const struct brw_tracked_state brw_cs_state = {
/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
+ unsigned dispatch_width)
+{
+ return 3 * dispatch_width * sizeof(uint32_t);
+}
+
+
+/**
* Creates a region containing the push constants for the CS on gen7+.
*
* Push constants are constant values (such as GLSL uniforms) that are
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 8404aa3..5738918 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -42,6 +42,11 @@ void
brw_upload_cs_prog(struct brw_context *brw);
#ifdef __cplusplus
+
+unsigned
+brw_cs_prog_local_id_payload_size(const struct gl_program *prog,
+ unsigned dispatch_width);
+
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 15fe364..b72eb76 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -42,6 +42,7 @@
#include "brw_eu.h"
#include "brw_wm.h"
#include "brw_fs.h"
+#include "brw_cs.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
@@ -4624,6 +4625,16 @@ fs_visitor::setup_cs_payload()
assert(devinfo->gen >= 7);
payload.num_regs = 1;
+
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ const unsigned local_id_size =
+ brw_cs_prog_local_id_payload_size(prog, dispatch_width);
+ const unsigned local_id_regs = ALIGN(local_id_size, 32) / 32;
+ if (local_id_regs > 0) {
+ payload.local_invocation_id_reg = payload.num_regs;
+ payload.num_regs += local_id_regs;
+ }
+ }
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4749c47..b2266b2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -368,6 +368,7 @@ public:
uint8_t sample_pos_reg;
uint8_t sample_mask_in_reg;
uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+ uint8_t local_invocation_id_reg;
/** The number of thread payload registers the hardware will supply. */
uint8_t num_regs;
--
2.1.4
More information about the mesa-dev
mailing list