[Mesa-dev] [PATCH 07/11] i965: Add uniform to hold the CS thread ID base

Jordan Justen jordan.l.justen at intel.com
Tue May 24 08:37:52 UTC 2016


This thread ID will be used to compute the gl_LocalInvocationIndex and
gl_LocalInvocationID values.

Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_compiler.h       |  1 +
 src/mesa/drivers/dri/i965/brw_cs.c             | 16 +++++++++++++---
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 11 +++++++++++
 src/mesa/drivers/dri/i965/brw_nir.c            | 17 +++++++++++++++++
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp |  2 +-
 5 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index fb0e9ae..22f2316 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -425,6 +425,7 @@ struct brw_cs_prog_data {
    bool uses_barrier;
    bool uses_num_work_groups;
    unsigned local_invocation_id_regs;
+   int thread_local_id_index;
 
    struct {
       /** @{
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 0ab9ebd..74e66bc 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -64,6 +64,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
    struct brw_cs_prog_data prog_data;
    bool start_busy = false;
    double start_time = 0;
+   nir_shader *nir = cp->program.Base.nir;
 
    struct brw_shader *cs =
       (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
@@ -71,6 +72,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
    memset(&prog_data, 0, sizeof(prog_data));
 
+   prog_data.thread_local_id_index = -1;
+   nir_foreach_variable(var, &nir->uniforms) {
+      if (strcmp(var->name, "gl_i965_cs_thread_local_id") == 0) {
+         prog_data.thread_local_id_index = var->data.driver_location / 4;
+         break;
+      }
+   }
+   assert(prog_data.thread_local_id_index >= 0);
+
    if (prog->Comp.SharedSize > 64 * 1024) {
       prog->LinkStatus = false;
       const char *error_str =
@@ -91,7 +101,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
     * prog_data associated with the compiled program, and which will be freed
     * by the state cache.
     */
-   int param_count = cp->program.Base.nir->num_uniforms;
+   int param_count = nir->num_uniforms;
 
    /* The backend also sometimes adds params for texture size. */
    param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -104,7 +114,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
    prog_data.base.nr_params = param_count;
    prog_data.base.nr_image_params = cs->base.NumImages;
 
-   brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base,
+   brw_nir_setup_glsl_uniforms(nir, prog, &cp->program.Base,
                                &prog_data.base, true);
 
    if (unlikely(brw->perf_debug)) {
@@ -122,7 +132,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
    char *error_str;
    program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx,
-                            key, &prog_data, cp->program.Base.nir,
+                            key, &prog_data, nir,
                             st_index, &program_size, &error_str);
    if (program == NULL) {
       prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 847a6d3..b71e21d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2130,6 +2130,12 @@ fs_visitor::assign_constant_locations()
       }
    }
 
+   if (stage == MESA_SHADER_COMPUTE) {
+      brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data;
+      if (!is_live[prog_data->thread_local_id_index])
+         prog_data->thread_local_id_index = -1;
+   }
+
    /* Only allow 16 registers (128 uniform components) as push constants.
     *
     * Just demote the end of the list.  We could probably do better
@@ -2208,6 +2214,11 @@ fs_visitor::assign_constant_locations()
          stage_prog_data->pull_param[pull_constant_loc[i]] = value;
       } else if (push_constant_loc[i] != -1) {
          stage_prog_data->param[push_constant_loc[i]] = value;
+         if (stage == MESA_SHADER_COMPUTE) {
+            brw_cs_prog_data *prog_data = (brw_cs_prog_data*)stage_prog_data;
+            if (prog_data->thread_local_id_index == (int)i)
+               prog_data->thread_local_id_index = push_constant_loc[i];
+         }
       }
    }
    ralloc_free(param);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 9274f2e..7ee14d5 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -423,6 +423,20 @@ nir_optimize(nir_shader *nir, bool is_scalar)
    return nir;
 }
 
+static void
+add_cs_local_id_uniform(nir_shader *shader)
+{
+   int location = 0;
+   nir_foreach_variable(var, &shader->uniforms) {
+      location = MAX2(location, var->data.location);
+   }
+
+   nir_variable *var =
+      nir_variable_create(shader, nir_var_uniform, glsl_uint_type(),
+                          "gl_i965_cs_thread_local_id");
+   var->data.location = location + 1;
+}
+
 /* Does some simple lowering and runs the standard suite of optimizations
  *
  * This is intended to be called more-or-less directly after you get the
@@ -440,6 +454,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
 
    const bool is_scalar = compiler->scalar_stage[nir->stage];
 
+   if (nir->stage == MESA_SHADER_COMPUTE)
+      OPT_V(add_cs_local_id_uniform);
+
    if (nir->stage == MESA_SHADER_GEOMETRY)
       OPT(nir_lower_gs_intrinsics);
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index b752ad5..716a390 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -32,7 +32,7 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
                                    bool is_scalar)
 {
    const nir_state_slot *const slots = var->state_slots;
-   assert(var->state_slots != NULL);
+   assert(var->num_state_slots == 0 || var->state_slots != NULL);
 
    unsigned uniform_index = var->data.driver_location / 4;
    for (unsigned int i = 0; i < var->num_state_slots; i++) {
-- 
2.8.1



More information about the mesa-dev mailing list