[Mesa-dev] [PATCH v2 26/52] intel/cs: Re-run final NIR optimizations for each SIMD size
Jason Ekstrand
jason at jlekstrand.net
Fri Oct 13 05:47:52 UTC 2017
With the advent of SPIR-V subgroup operations, compute shaders will have
to be slightly different depending on the SIMD size at which they
execute. In order to allow us to do dispatch-width specific things in
NIR, we re-run the final NIR stages for each sIMD width.
One side-effect of this change is that we start rallocing fs_visitors
which means we need DECLARE_RALLOC_CXX_OPERATORS.
---
src/intel/compiler/brw_fs.cpp | 103 ++++++++++++++++++++++++++----------------
src/intel/compiler/brw_fs.h | 2 +
2 files changed, 66 insertions(+), 39 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5a35a33..a548df7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6768,6 +6768,20 @@ cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
cs_prog_data->threads = (group_size + size - 1) / size;
}
+static nir_shader *
+compile_cs_to_nir(const struct brw_compiler *compiler,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const nir_shader *src_shader,
+ unsigned dispatch_width)
+{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
+ brw_nir_lower_cs_intrinsics(shader);
+ return brw_postprocess_nir(shader, compiler, true);
+}
+
const unsigned *
brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
@@ -6778,17 +6792,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
unsigned *final_assembly_size,
char **error_str)
{
- nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
- shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
- brw_nir_lower_cs_intrinsics(shader);
- shader = brw_postprocess_nir(shader, compiler, true);
-
- prog_data->local_size[0] = shader->info.cs.local_size[0];
- prog_data->local_size[1] = shader->info.cs.local_size[1];
- prog_data->local_size[2] = shader->info.cs.local_size[2];
+ prog_data->local_size[0] = src_shader->info.cs.local_size[0];
+ prog_data->local_size[1] = src_shader->info.cs.local_size[1];
+ prog_data->local_size[2] = src_shader->info.cs.local_size[2];
unsigned local_workgroup_size =
- shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
- shader->info.cs.local_size[2];
+ src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] *
+ src_shader->info.cs.local_size[2];
unsigned min_dispatch_width =
DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_threads);
@@ -6796,71 +6805,87 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
min_dispatch_width = util_next_power_of_two(min_dispatch_width);
assert(min_dispatch_width <= 32);
+
+ fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
+ unsigned promoted_constants;
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
- NULL, /* Never used in core profile */
- shader, 8, shader_time_index);
if (min_dispatch_width <= 8) {
- if (!v8.run_cs(min_dispatch_width)) {
- fail_msg = v8.fail_msg;
+ nir_shader *nir8 = compile_cs_to_nir(compiler, mem_ctx, key,
+ prog_data, src_shader, 8);
+ v8 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key,
+ &prog_data->base,
+ NULL, /* Never used in core profile */
+ nir8, 8, shader_time_index);
+ if (!v8->run_cs(min_dispatch_width)) {
+ fail_msg = v8->fail_msg;
} else {
- cfg = v8.cfg;
+ cfg = v8->cfg;
cs_set_simd_size(prog_data, 8);
cs_fill_push_const_info(compiler->devinfo, prog_data);
+ promoted_constants = v8->promoted_constants;
}
}
- fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
- NULL, /* Never used in core profile */
- shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && min_dispatch_width <= 16) {
/* Try a SIMD16 compile */
- if (min_dispatch_width <= 8)
- v16.import_uniforms(&v8);
- if (!v16.run_cs(min_dispatch_width)) {
+ nir_shader *nir16 = compile_cs_to_nir(compiler, mem_ctx, key,
+ prog_data, src_shader, 16);
+ v16 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key,
+ &prog_data->base,
+ NULL, /* Never used in core profile */
+ nir16, 16, shader_time_index);
+ if (v8)
+ v16->import_uniforms(v8);
+
+ if (!v16->run_cs(min_dispatch_width)) {
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
- v16.fail_msg);
+ v16->fail_msg);
if (!cfg) {
fail_msg =
"Couldn't generate SIMD16 program and not "
"enough threads for SIMD8";
}
} else {
- cfg = v16.cfg;
+ cfg = v16->cfg;
cs_set_simd_size(prog_data, 16);
cs_fill_push_const_info(compiler->devinfo, prog_data);
+ promoted_constants = v16->promoted_constants;
}
}
- fs_visitor v32(compiler, log_data, mem_ctx, key, &prog_data->base,
- NULL, /* Never used in core profile */
- shader, 32, shader_time_index);
if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32))) {
/* Try a SIMD32 compile */
- if (min_dispatch_width <= 8)
- v32.import_uniforms(&v8);
- else if (min_dispatch_width <= 16)
- v32.import_uniforms(&v16);
-
- if (!v32.run_cs(min_dispatch_width)) {
+ nir_shader *nir32 = compile_cs_to_nir(compiler, mem_ctx, key,
+ prog_data, src_shader, 32);
+ v32 = new(mem_ctx) fs_visitor(compiler, log_data, mem_ctx, key,
+ &prog_data->base,
+ NULL, /* Never used in core profile */
+ nir32, 32, shader_time_index);
+ if (v8)
+ v32->import_uniforms(v8);
+ else if (v16)
+ v32->import_uniforms(v16);
+
+ if (!v32->run_cs(min_dispatch_width)) {
compiler->shader_perf_log(log_data,
"SIMD32 shader failed to compile: %s",
- v16.fail_msg);
+ v16->fail_msg);
if (!cfg) {
fail_msg =
"Couldn't generate SIMD32 program and not "
"enough threads for SIMD16";
}
} else {
- cfg = v32.cfg;
+ cfg = v32->cfg;
cs_set_simd_size(prog_data, 32);
cs_fill_push_const_info(compiler->devinfo, prog_data);
+ promoted_constants = v32->promoted_constants;
}
}
@@ -6873,12 +6898,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
}
fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
- v8.promoted_constants, false, MESA_SHADER_COMPUTE);
+ promoted_constants, false, MESA_SHADER_COMPUTE);
if (INTEL_DEBUG & DEBUG_CS) {
char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
- shader->info.label ? shader->info.label :
- "unnamed",
- shader->info.name);
+ src_shader->info.label ?
+ src_shader->info.label : "unnamed",
+ src_shader->info.name);
g.enable_debug(name);
}
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index f51a4d8..d3ab385 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -60,6 +60,8 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)
class fs_visitor : public backend_shader
{
public:
+ DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
+
fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const void *key,
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list