[Mesa-dev] [RFC 7/7] i965/fs: Enable all SIMD32 heuristics
Toni Lönnberg
toni.lonnberg at intel.com
Mon Oct 15 13:19:58 UTC 2018
There are three simple heuristics for SIMD32 shader enabling:
- How many MRTs does the shader write into?
- How many grouped texture fetches does the shader have?
- How many instructions does the SIMD32 shader have compared to the SIMD16
shader?
For testing purposes, the heuristics can be controlled via these environment
variables:
simd32_heuristic_mrt_check
- Enables MRT write check
- Default: true
simd32_heuristic_max_mrts
- How many MRT writes the heuristic allows
- Default: 1
simd32_heuristic_grouped_check
- Enables grouped texture fetch check
- Default: true
simd32_heuristic_grouped_sends
- How many grouped texture fetches the heuristic allows
- Default: 6
simd32_heuristic_inst_check
- Enables SIMD32 vs. SIMD16 instruction count check
- Default: true
simd32_heuristic_inst_ratio
- SIMD32 vs. SIMD16 instruction count ratio the heuristic allows
- Default: 2.3
SIMD32 shaders will not be compiled also when SIMD16 compilation fails or
spills.
---
src/intel/compiler/brw_fs.cpp | 37 +++++++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 02e151f..5cceb6c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7120,6 +7120,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
+ bool simd16_failed = false;
+ bool simd16_spilled = false;
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
@@ -7187,10 +7189,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
shader_time_index16);
v16.import_uniforms(&v8);
if (!v16.run_fs(allow_spilling, use_rep_send)) {
+ simd16_failed = true;
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16.fail_msg);
} else {
+ simd16_spilled = v16.spilled_any_registers;
simd16_cfg = v16.cfg;
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
@@ -7198,9 +7202,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
}
/* Currently, the compiler only supports SIMD32 on SNB+ */
+ const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control;
+ uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;
+
if (v8.max_dispatch_width >= 32 && !use_rep_send &&
compiler->devinfo->gen >= 6 &&
- unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+ (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+ (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+ !simd16_failed && !simd16_spilled &&
+ (!ctrl->mrt_check ||
+ (ctrl->mrt_check &&
+ u_count_bits64(&mrts) <= ctrl->max_mrts))))) {
/* Try a SIMD32 compile */
fs_visitor v32(compiler, log_data, mem_ctx, key,
&prog_data->base, prog, shader, 32,
@@ -7211,9 +7223,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
"SIMD32 shader failed to compile: %s",
v32.fail_msg);
} else {
- simd32_cfg = v32.cfg;
- prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
- prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
+ v32.run_heuristic(ctrl)) {
+ simd32_cfg = v32.cfg;
+ prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
+ prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+ }
}
}
@@ -7292,8 +7307,18 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
}
if (simd32_cfg) {
- prog_data->dispatch_32 = true;
- prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
+ uint32_t offset = g.generate_code(simd32_cfg, 32);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+ (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+ (!simd16_cfg ||
+ (simd16_cfg &&
+ (!ctrl->inst_count_check ||
+ (ctrl->inst_count_check &&
+ (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) {
+ prog_data->dispatch_32 = true;
+ prog_data->prog_offset_32 = offset;
+ }
}
return g.get_assembly();
--
2.7.4
More information about the mesa-dev
mailing list