[Mesa-dev] [PATCH v2 20/52] intel/fs: Remove min_dispatch_width from fs_visitor
Jason Ekstrand
jason at jlekstrand.net
Fri Oct 13 05:47:46 UTC 2017
It's 8 for everything except compute shaders. For compute shaders,
there's no need to duplicate the computation and it's just a possible
source of error.
---
src/intel/compiler/brw_fs.cpp | 42 +++++++++++++++++++----------------
src/intel/compiler/brw_fs.h | 5 ++---
src/intel/compiler/brw_fs_visitor.cpp | 11 ---------
3 files changed, 25 insertions(+), 33 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index e629541..676496f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5884,7 +5884,7 @@ fs_visitor::fixup_3src_null_dest()
}
void
-fs_visitor::allocate_registers(bool allow_spilling)
+fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling)
{
bool allocated_without_spills;
@@ -6019,7 +6019,7 @@ fs_visitor::run_vs()
assign_vs_urb_setup();
fixup_3src_null_dest();
- allocate_registers(true);
+ allocate_registers(8, true);
return !failed;
}
@@ -6099,7 +6099,7 @@ fs_visitor::run_tcs_single_patch()
assign_tcs_single_patch_urb_setup();
fixup_3src_null_dest();
- allocate_registers(true);
+ allocate_registers(8, true);
return !failed;
}
@@ -6133,7 +6133,7 @@ fs_visitor::run_tes()
assign_tes_urb_setup();
fixup_3src_null_dest();
- allocate_registers(true);
+ allocate_registers(8, true);
return !failed;
}
@@ -6182,7 +6182,7 @@ fs_visitor::run_gs()
assign_gs_urb_setup();
fixup_3src_null_dest();
- allocate_registers(true);
+ allocate_registers(8, true);
return !failed;
}
@@ -6253,7 +6253,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
assign_urb_setup();
fixup_3src_null_dest();
- allocate_registers(allow_spilling);
+ allocate_registers(8, allow_spilling);
if (failed)
return false;
@@ -6263,9 +6263,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
}
bool
-fs_visitor::run_cs()
+fs_visitor::run_cs(unsigned min_dispatch_width)
{
assert(stage == MESA_SHADER_COMPUTE);
+ assert(dispatch_width >= min_dispatch_width);
setup_cs_payload();
@@ -6296,7 +6297,7 @@ fs_visitor::run_cs()
assign_curb_setup();
fixup_3src_null_dest();
- allocate_registers(true);
+ allocate_registers(min_dispatch_width, true);
if (failed)
return false;
@@ -6786,8 +6787,11 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
shader->info.cs.local_size[2];
- unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
- unsigned simd_required = DIV_ROUND_UP(local_workgroup_size, max_cs_threads);
+ unsigned min_dispatch_width =
+ DIV_ROUND_UP(local_workgroup_size, compiler->devinfo->max_cs_threads);
+ min_dispatch_width = MAX2(8, min_dispatch_width);
+ min_dispatch_width = util_next_power_of_two(min_dispatch_width);
+ assert(min_dispatch_width <= 32);
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
@@ -6797,8 +6801,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
NULL, /* Never used in core profile */
shader, 8, shader_time_index);
- if (simd_required <= 8) {
- if (!v8.run_cs()) {
+ if (min_dispatch_width <= 8) {
+ if (!v8.run_cs(min_dispatch_width)) {
fail_msg = v8.fail_msg;
} else {
cfg = v8.cfg;
@@ -6813,11 +6817,11 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && v8.max_dispatch_width >= 16 &&
- simd_required <= 16) {
+ min_dispatch_width <= 16) {
/* Try a SIMD16 compile */
- if (simd_required <= 8)
+ if (min_dispatch_width <= 8)
v16.import_uniforms(&v8);
- if (!v16.run_cs()) {
+ if (!v16.run_cs(min_dispatch_width)) {
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16.fail_msg);
@@ -6838,14 +6842,14 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
NULL, /* Never used in core profile */
shader, 32, shader_time_index);
if (!fail_msg && v8.max_dispatch_width >= 32 &&
- (simd_required > 16 || (INTEL_DEBUG & DEBUG_DO32))) {
+ (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32))) {
/* Try a SIMD32 compile */
- if (simd_required <= 8)
+ if (min_dispatch_width <= 8)
v32.import_uniforms(&v8);
- else if (simd_required <= 16)
+ else if (min_dispatch_width <= 16)
v32.import_uniforms(&v16);
- if (!v32.run_cs()) {
+ if (!v32.run_cs(min_dispatch_width)) {
compiler->shader_perf_log(log_data,
"SIMD32 shader failed to compile: %s",
v16.fail_msg);
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index b070d38..da32593 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -99,9 +99,9 @@ public:
bool run_tcs_single_patch();
bool run_tes();
bool run_gs();
- bool run_cs();
+ bool run_cs(unsigned min_dispatch_width);
void optimize();
- void allocate_registers(bool allow_spilling);
+ void allocate_registers(unsigned min_dispatch_width, bool allow_spilling);
void setup_fs_payload_gen4();
void setup_fs_payload_gen6();
void setup_vs_payload();
@@ -364,7 +364,6 @@ public:
bool spilled_any_registers;
const unsigned dispatch_width; /**< 8, 16 or 32 */
- unsigned min_dispatch_width;
unsigned max_dispatch_width;
int shader_time_index;
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 32fe0fc..9fd4c20 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -871,17 +871,6 @@ fs_visitor::init()
unreachable("unhandled shader stage");
}
- if (stage == MESA_SHADER_COMPUTE) {
- const struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
- unsigned size = cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] *
- cs_prog_data->local_size[2];
- size = DIV_ROUND_UP(size, devinfo->max_cs_threads);
- min_dispatch_width = size > 16 ? 32 : (size > 8 ? 16 : 8);
- } else {
- min_dispatch_width = 8;
- }
-
this->max_dispatch_width = 32;
this->prog_data = this->stage_prog_data;
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list