[Mesa-dev] [PATCH 14/14] i965: Generate vs code using scalar backend for BDW+
Kristian Høgsberg
krh at bitplanet.net
Tue Oct 28 15:17:57 PDT 2014
With everything in place, we can now use the scalar backend compiler for
vertex shaders on BDW+. We make scalar vertex shaders the default on
BDW+ but add a new vec4vs debug option to force the vec4 backend.
No piglit regressions.
Performance impact is minimal, I see a ~1.5 improvement on the T-Rex
GLBenchmark case, but in general it's in the noise. Some of our
internal synthetic, vs bounded benchmarks show great improvement, 20%-40%
in some cases, but real-world cases are mostly unaffected.
Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_context.c | 13 +++++++
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_shader.cpp | 17 +++++++--
src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 +++++++++++++++++++++++++-------
src/mesa/drivers/dri/i965/intel_debug.c | 1 +
src/mesa/drivers/dri/i965/intel_debug.h | 1 +
6 files changed, 78 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index e1a994a..f56cfb2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -557,6 +557,15 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
+ if (brw->scalar_vs) {
+ /* If we're using the scalar backend for vertex shaders, we need to
+ * configure these accordingly.
+ */
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+ }
+
/* ARB_viewport_array */
if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
@@ -755,6 +764,10 @@ brwCreateContext(gl_api api,
brw_process_driconf_options(brw);
brw_process_intel_debug_variable(brw);
+
+ if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
+ brw->scalar_vs = true;
+
brw_initialize_context_constants(brw);
ctx->Const.ResetStrategy = notify_reset
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index e7cd30f..197a8b9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1068,6 +1068,7 @@ struct brw_context
bool has_pln;
bool no_simd8;
bool use_rep_send;
+ bool scalar_vs;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 278e404..a5e42e7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -71,6 +71,19 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
return true;
}
+static inline bool
+is_scalar_shader_stage(struct brw_context *brw, int stage)
+{
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return true;
+ case MESA_SHADER_VERTEX:
+ return brw->scalar_vs;
+ default:
+ return false;
+ }
+}
+
static void
brw_lower_packing_builtins(struct brw_context *brw,
gl_shader_stage shader_type,
@@ -91,7 +104,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
* lowering is needed. For SOA code, the Half2x16 ops must be
* scalarized.
*/
- if (shader_type == MESA_SHADER_FRAGMENT) {
+ if (is_scalar_shader_stage(brw, shader_type)) {
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
}
@@ -179,7 +192,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
do {
progress = false;
- if (stage == MESA_SHADER_FRAGMENT) {
+ if (is_scalar_shader_stage(brw, stage)) {
brw_do_channel_expressions(shader->base.ir);
brw_do_vector_splitting(shader->base.ir);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index df589b8..0fa153d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -22,6 +22,7 @@
*/
#include "brw_vec4.h"
+#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_vs.h"
#include "brw_dead_control_flow.h"
@@ -1863,6 +1864,7 @@ brw_vs_emit(struct brw_context *brw,
{
bool start_busy = false;
double start_time = 0;
+ const unsigned *assembly = NULL;
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
@@ -1877,23 +1879,55 @@ brw_vs_emit(struct brw_context *brw,
if (unlikely(INTEL_DEBUG & DEBUG_VS))
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
- vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
- if (!v.run()) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
+ if (prog && brw->gen >= 8 && brw->scalar_vs) {
+ fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
+ if (!v.run_vs()) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+ }
- _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
- v.fail_msg);
+ _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
+ v.fail_msg);
- return NULL;
+ return NULL;
+ }
+
+ fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
+ &c->vp->program.Base, v.runtime_check_aads_emit);
+ if (INTEL_DEBUG & DEBUG_VS) {
+ char *name = ralloc_asprintf(mem_ctx, "%s vertex shader %d",
+ prog->Label ? prog->Label : "unnamed",
+ prog->Name);
+ g.enable_debug(name);
+ }
+ assembly = g.generate_assembly(v.cfg, NULL,
+ final_assembly_size);
+ if (brw->gen < 8)
+ assembly = NULL;
+ if (assembly)
+ prog_data->base.simd8 = true;
+ c->base.last_scratch = v.last_scratch;
}
- const unsigned *assembly = NULL;
- vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
- mem_ctx, INTEL_DEBUG & DEBUG_VS);
- assembly = g.generate_assembly(v.cfg, final_assembly_size);
+ if (!assembly) {
+ vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
+ if (!v.run()) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+ }
+
+ _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
+ v.fail_msg);
+
+ return NULL;
+ }
+
+ vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
+ mem_ctx, INTEL_DEBUG & DEBUG_VS);
+ assembly = g.generate_assembly(v.cfg, final_assembly_size);
+ }
if (unlikely(brw->perf_debug) && shader) {
if (shader->compiled_once) {
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index a283357..6e729b5 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -67,6 +67,7 @@ static const struct dri_debug_control debug_control[] = {
{ "optimizer", DEBUG_OPTIMIZER },
{ "noann", DEBUG_NO_ANNOTATION },
{ "no8", DEBUG_NO8 },
+ { "vec4vs", DEBUG_VEC4VS },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index e859be1..2f20616 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -63,6 +63,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_OPTIMIZER (1 << 27)
#define DEBUG_NO_ANNOTATION (1 << 28)
#define DEBUG_NO8 (1 << 29)
+#define DEBUG_VEC4VS (1 << 30)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
--
2.1.0
More information about the mesa-dev
mailing list