[Mesa-dev] [PATCH 21/21] intel/cs: Make thread_local_id a regular builtin param
Jason Ekstrand
jason at jlekstrand.net
Fri Sep 29 21:25:21 UTC 2017
This is a lot more natural than special casing it all over the place.
We still have to do a bit of special-casing in assign_constant_locations
but it's not special-cased quite as bad as it was before.
---
src/intel/compiler/brw_compiler.h | 3 +-
src/intel/compiler/brw_fs.cpp | 42 ++++++++++++-------------
src/intel/compiler/brw_nir_intrinsics.c | 12 ++++---
src/intel/vulkan/anv_cmd_buffer.c | 10 +++---
src/mesa/drivers/dri/i965/gen6_constant_state.c | 10 +++---
5 files changed, 37 insertions(+), 40 deletions(-)
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index f9679a3..768b932 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -543,6 +543,8 @@ enum brw_param_builtin {
BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W,
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
+
+ BRW_PARAM_BUILTIN_THREAD_LOCAL_ID,
};
#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \
@@ -739,7 +741,6 @@ struct brw_cs_prog_data {
unsigned threads;
bool uses_barrier;
bool uses_num_work_groups;
- int thread_local_id_index;
struct {
struct brw_push_const_block cross_thread;
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index b06ae9f..05b97f0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1936,6 +1936,20 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
}
}
+static int
+get_thread_local_id_param_index(const brw_stage_prog_data *prog_data)
+{
+ if (prog_data->nr_params == 0)
+ return -1;
+
+ /* The local thread id is always the last parameter in the list */
+ uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
+ if (last_param == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID)
+ return prog_data->nr_params - 1;
+
+ return -1;
+}
+
/**
* Assign UNIFORM file registers to either push constants or pull constants.
*
@@ -1964,10 +1978,6 @@ fs_visitor::assign_constant_locations()
bool contiguous[uniforms];
memset(contiguous, 0, sizeof(contiguous));
- int thread_local_id_index =
- (stage == MESA_SHADER_COMPUTE) ?
- brw_cs_prog_data(stage_prog_data)->thread_local_id_index : -1;
-
/* First, we walk through the instructions and do two things:
*
* 1) Figure out which uniforms are live.
@@ -2010,8 +2020,7 @@ fs_visitor::assign_constant_locations()
}
}
- if (thread_local_id_index >= 0 && !is_live[thread_local_id_index])
- thread_local_id_index = -1;
+ int thread_local_id_index = get_thread_local_id_param_index(stage_prog_data);
/* Only allow 16 registers (128 uniform components) as push constants.
*
@@ -2119,22 +2128,15 @@ fs_visitor::assign_constant_locations()
* push_constant_loc[i] <= i and we can do it in one smooth loop without
* having to make a copy.
*/
- int new_thread_local_id_index = -1;
for (unsigned int i = 0; i < uniforms; i++) {
uint32_t value = param[i];
if (pull_constant_loc[i] != -1) {
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
} else if (push_constant_loc[i] != -1) {
stage_prog_data->param[push_constant_loc[i]] = value;
- if (thread_local_id_index == (int)i)
- new_thread_local_id_index = push_constant_loc[i];
}
}
ralloc_free(param);
-
- if (stage == MESA_SHADER_COMPUTE)
- brw_cs_prog_data(stage_prog_data)->thread_local_id_index =
- new_thread_local_id_index;
}
bool
@@ -6693,24 +6695,20 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo,
struct brw_cs_prog_data *cs_prog_data)
{
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
- bool fill_thread_id =
- cs_prog_data->thread_local_id_index >= 0 &&
- cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
+ int thread_local_id_index = get_thread_local_id_param_index(prog_data);
bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
/* The thread ID should be stored in the last param dword */
- assert(prog_data->nr_params > 0 || !fill_thread_id);
- assert(!fill_thread_id ||
- cs_prog_data->thread_local_id_index ==
- (int)prog_data->nr_params - 1);
+ assert(thread_local_id_index == -1 ||
+ thread_local_id_index == prog_data->nr_params - 1);
unsigned cross_thread_dwords, per_thread_dwords;
if (!cross_thread_supported) {
cross_thread_dwords = 0u;
per_thread_dwords = prog_data->nr_params;
- } else if (fill_thread_id) {
+ } else if (thread_local_id_index >= 0) {
/* Fill all but the last register with cross-thread payload */
- cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
+ cross_thread_dwords = 8 * (thread_local_id_index / 8);
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
} else {
diff --git a/src/intel/compiler/brw_nir_intrinsics.c b/src/intel/compiler/brw_nir_intrinsics.c
index 83c6c0e..161e972 100644
--- a/src/intel/compiler/brw_nir_intrinsics.c
+++ b/src/intel/compiler/brw_nir_intrinsics.c
@@ -33,6 +33,7 @@ struct lower_intrinsics_state {
nir_function_impl *impl;
bool progress;
nir_builder builder;
+ int thread_local_id_index;
};
static nir_ssa_def *
@@ -50,12 +51,13 @@ read_thread_local_id(struct lower_intrinsics_state *state)
if (group_size <= 8)
return nir_imm_int(b, 0);
- if (state->cs_prog_data->thread_local_id_index == -1) {
- state->cs_prog_data->thread_local_id_index = prog_data->nr_params;
- brw_stage_prog_data_add_params(prog_data, 1);
+ if (state->thread_local_id_index == -1) {
+ state->thread_local_id_index = prog_data->nr_params;
+ uint32_t *param = brw_stage_prog_data_add_params(prog_data, 1);
+ *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
nir->num_uniforms += 4;
}
- unsigned id_index = state->cs_prog_data->thread_local_id_index;
+ unsigned id_index = state->thread_local_id_index;
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
@@ -171,7 +173,7 @@ brw_nir_lower_intrinsics(nir_shader *nir, struct brw_stage_prog_data *prog_data)
state.nir = nir;
state.prog_data = prog_data;
- state.cs_prog_data->thread_local_id_index = -1;
+ state.thread_local_id_index = -1;
do {
state.progress = false;
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 5fa7bdb2..28b3f9f 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -706,12 +706,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
uint32_t *u32_map = state.map;
if (cs_prog_data->push.cross_thread.size > 0) {
- assert(cs_prog_data->thread_local_id_index < 0 ||
- cs_prog_data->thread_local_id_index >=
- cs_prog_data->push.cross_thread.dwords);
for (unsigned i = 0;
i < cs_prog_data->push.cross_thread.dwords;
i++) {
+ assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
}
}
@@ -723,11 +721,11 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
cs_prog_data->push.cross_thread.regs);
unsigned src = cs_prog_data->push.cross_thread.dwords;
for ( ; src < prog_data->nr_params; src++, dst++) {
- if (src != cs_prog_data->thread_local_id_index) {
+ if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
+ u32_map[dst] = t * cs_prog_data->simd_size;
+ } else {
u32_map[dst] =
anv_push_constant_value(data, prog_data->param[src]);
- } else {
- u32_map[dst] = t * cs_prog_data->simd_size;
}
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index eb9e291..62ad6b0 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -305,12 +305,10 @@ brw_upload_cs_push_constants(struct brw_context *brw,
if (cs_prog_data->push.cross_thread.size > 0) {
uint32_t *param_copy = param;
- assert(cs_prog_data->thread_local_id_index < 0 ||
- cs_prog_data->thread_local_id_index >=
- cs_prog_data->push.cross_thread.dwords);
for (unsigned i = 0;
i < cs_prog_data->push.cross_thread.dwords;
i++) {
+ assert(prog_data->param[i] != BRW_PARAM_BUILTIN_THREAD_LOCAL_ID);
param_copy[i] = brw_param_value(brw, prog, stage_state,
prog_data->param[i]);
}
@@ -323,11 +321,11 @@ brw_upload_cs_push_constants(struct brw_context *brw,
cs_prog_data->push.cross_thread.regs);
unsigned src = cs_prog_data->push.cross_thread.dwords;
for ( ; src < prog_data->nr_params; src++, dst++) {
- if (src != cs_prog_data->thread_local_id_index) {
+ if (prog_data->param[src] == BRW_PARAM_BUILTIN_THREAD_LOCAL_ID) {
+ param[dst] = t * cs_prog_data->simd_size;
+ } else {
param[dst] = brw_param_value(brw, prog, stage_state,
prog_data->param[src]);
- } else {
- param[dst] = t * cs_prog_data->simd_size;
}
}
}
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list