[Mesa-dev] [PATCH 05/12] i965/cs: Split out helper for building local id payload
Kristian Høgsberg Kristensen
krh at bitplanet.net
Wed Oct 7 07:11:45 PDT 2015
The initial motivation for this patch was to avoid calling
brw_cs_prog_local_id_payload_dwords() in gen7_cs_state.c from the
compiler. This commit ends up refactoring things a bit more so as to
split out the logic to build the local id payload to brw_fs.cpp. This
moves the payload building closer to the compiler code that uses the
payload layout and makes it avaiable to other users of the compiler.
Signed-off-by: Kristian Høgsberg Kristensen <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_cs.h | 5 +-
src/mesa/drivers/dri/i965/brw_fs.cpp | 68 ++++++++++++++++++++++++--
src/mesa/drivers/dri/i965/gen7_cs_state.c | 80 ++++---------------------------
4 files changed, 76 insertions(+), 78 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0a29a69..1869f28 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -484,6 +484,7 @@ struct brw_cs_prog_data {
unsigned simd_size;
bool uses_barrier;
bool uses_num_work_groups;
+ unsigned local_invocation_id_regs;
struct {
/** @{
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index 0c0ed2b..c07eb6c 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -48,8 +48,9 @@ brw_cs_emit(struct brw_context *brw,
struct gl_shader_program *prog,
unsigned *final_assembly_size);
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+ void *buffer, uint32_t threads, uint32_t stride);
#ifdef __cplusplus
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1dee888..b4125aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4718,20 +4718,43 @@ fs_visitor::setup_vs_payload()
payload.num_regs = 2;
}
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
payload.num_regs = 1;
if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(dispatch_width);
- assert((local_id_dwords & 0x7) == 0);
- const unsigned local_id_regs = local_id_dwords / 8;
+ prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
payload.local_invocation_id_reg = payload.num_regs;
- payload.num_regs += local_id_regs;
+ payload.num_regs += prog_data->local_invocation_id_regs;
}
}
@@ -5171,6 +5194,41 @@ brw_wm_fs_emit(struct brw_context *brw,
return g.get_assembly(final_assembly_size);
}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* stride should be an integer number of registers, that is, a multiple of
+ * 32 bytes. */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}
+
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 5edc4fc..6aeb0cb 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw)
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
@@ -191,63 +189,6 @@ const struct brw_tracked_state brw_cs_state = {
/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- * no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- * easily use 16-bit words rather than 32-bit dwords in the push constant
- * data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- * conveying the data, and thereby reduce push constant usage.
- *
- */
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
-{
- return 3 * dispatch_width;
-}
-
-
-static void
-fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
- void *buffer, unsigned *x, unsigned *y, unsigned *z)
-{
- uint32_t *param = (uint32_t *)buffer;
- for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
- param[0 * cs_prog_data->simd_size + i] = *x;
- param[1 * cs_prog_data->simd_size + i] = *y;
- param[2 * cs_prog_data->simd_size + i] = *z;
-
- (*x)++;
- if (*x == cs_prog_data->local_size[0]) {
- *x = 0;
- (*y)++;
- if (*y == cs_prog_data->local_size[1]) {
- *y = 0;
- (*z)++;
- if (*z == cs_prog_data->local_size[2])
- *z = 0;
- }
- }
- }
-}
-
-
-/**
* Creates a region containing the push constants for the CS on gen7+.
*
* Push constants are constant values (such as GLSL uniforms) that are
@@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw,
(struct brw_stage_prog_data*) cs_prog_data;
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
@@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw,
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+ brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
+ reg_aligned_constant_size);
+
/* _NEW_PROGRAM_CONSTANTS */
- unsigned x = 0, y = 0, z = 0;
for (t = 0; t < threads; t++) {
- gl_constant_value *next_param = ¶m[t * param_aligned_count];
- if (local_id_dwords > 0) {
- fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
- next_param += local_id_dwords;
- }
+ gl_constant_value *next_param =
+ ¶m[t * param_aligned_count + local_id_dwords];
for (i = 0; i < prog_data->nr_params; i++) {
next_param[i] = *prog_data->param[i];
}
--
2.4.3
More information about the mesa-dev
mailing list