[Mesa-dev] [PATCH v3 12/12] i965: Generate vs code using scalar backend for BDW+

Kristian Høgsberg krh at bitplanet.net
Thu Dec 4 22:02:33 PST 2014


With everything in place, we can now use the scalar backend compiler for
vertex shaders on BDW+.  We make scalar vertex shaders the default on
BDW+ but add a new vec4vs debug option to force the vec4 backend.

No piglit regressions.

Performance impact is minimal, I see a ~1.5 improvement on the T-Rex
GLBenchmark case, but in general it's in the noise.  Some of our
internal synthetic, vs bounded benchmarks show great improvement, 20%-40%
in some cases, but real-world cases are mostly unaffected.

Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_context.c  | 13 ++++++++
 src/mesa/drivers/dri/i965/brw_context.h  |  1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp | 19 +++++++++--
 src/mesa/drivers/dri/i965/brw_vec4.cpp   | 57 +++++++++++++++++++++++++-------
 src/mesa/drivers/dri/i965/intel_debug.c  |  1 +
 src/mesa/drivers/dri/i965/intel_debug.h  |  1 +
 6 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 5830b6e..5bf1859 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -567,6 +567,15 @@ brw_initialize_context_constants(struct brw_context *brw)
    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
    ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 
+   if (brw->scalar_vs) {
+      /* If we're using the scalar backend for vertex shaders, we need to
+       * configure these accordingly.
+       */
+      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+   }
+
    /* ARB_viewport_array */
    if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
       ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
@@ -762,6 +771,10 @@ brwCreateContext(gl_api api,
 
    brw_process_driconf_options(brw);
    brw_process_intel_debug_variable(brw);
+
+   if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
+      brw->scalar_vs = true;
+
    brw_initialize_context_constants(brw);
 
    ctx->Const.ResetStrategy = notify_reset
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c3b9013..1b8f0bb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1076,6 +1076,7 @@ struct brw_context
    bool has_pln;
    bool no_simd8;
    bool use_rep_send;
+   bool scalar_vs;
 
    /**
     * Some versions of Gen hardware don't do centroid interpolation correctly
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1e6ecb5..da87539 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -74,6 +74,19 @@ brw_shader_precompile(struct gl_context *ctx,
    return true;
 }
 
+static inline bool
+is_scalar_shader_stage(struct brw_context *brw, int stage)
+{
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT:
+      return true;
+   case MESA_SHADER_VERTEX:
+      return brw->scalar_vs;
+   default:
+      return false;
+   }
+}
+
 static void
 brw_lower_packing_builtins(struct brw_context *brw,
                            gl_shader_stage shader_type,
@@ -84,7 +97,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
            | LOWER_PACK_UNORM_2x16
            | LOWER_UNPACK_UNORM_2x16;
 
-   if (shader_type == MESA_SHADER_FRAGMENT) {
+   if (is_scalar_shader_stage(brw, shader_type)) {
       ops |= LOWER_UNPACK_UNORM_4x8
            | LOWER_UNPACK_SNORM_4x8
            | LOWER_PACK_UNORM_4x8
@@ -97,7 +110,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
        * lowering is needed. For SOA code, the Half2x16 ops must be
        * scalarized.
        */
-      if (shader_type == MESA_SHADER_FRAGMENT) {
+      if (is_scalar_shader_stage(brw, shader_type)) {
          ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
              |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
       }
@@ -185,7 +198,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       do {
 	 progress = false;
 
-	 if (stage == MESA_SHADER_FRAGMENT) {
+	 if (is_scalar_shader_stage(brw, stage)) {
 	    brw_do_channel_expressions(shader->base.ir);
 	    brw_do_vector_splitting(shader->base.ir);
 	 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 71b396a..2015e4f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -22,6 +22,7 @@
  */
 
 #include "brw_vec4.h"
+#include "brw_fs.h"
 #include "brw_cfg.h"
 #include "brw_vs.h"
 #include "brw_dead_control_flow.h"
@@ -1760,6 +1761,7 @@ brw_vs_emit(struct brw_context *brw,
 {
    bool start_busy = false;
    double start_time = 0;
+   const unsigned *assembly = NULL;
 
    if (unlikely(brw->perf_debug)) {
       start_busy = (brw->batch.last_bo &&
@@ -1774,23 +1776,54 @@ brw_vs_emit(struct brw_context *brw,
    if (unlikely(INTEL_DEBUG & DEBUG_VS))
       brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
 
-   vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
-   if (!v.run()) {
-      if (prog) {
-         prog->LinkStatus = false;
-         ralloc_strcat(&prog->InfoLog, v.fail_msg);
+   if (prog && brw->gen >= 8 && brw->scalar_vs) {
+      fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
+      if (!v.run_vs()) {
+         if (prog) {
+            prog->LinkStatus = false;
+            ralloc_strcat(&prog->InfoLog, v.fail_msg);
+         }
+
+         _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
+                       v.fail_msg);
+
+         return NULL;
       }
 
-      _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
-                    v.fail_msg);
+      fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
+                     &c->vp->program.Base, v.runtime_check_aads_emit);
+      if (INTEL_DEBUG & DEBUG_VS) {
+         char *name = ralloc_asprintf(mem_ctx, "%s vertex shader %d",
+                                      prog->Label ? prog->Label : "unnamed",
+                                      prog->Name);
+         g.enable_debug(name);
+      }
+      g.generate_code(v.cfg, 8);
+      assembly = g.get_assembly(final_assembly_size);
 
-      return NULL;
+      if (assembly)
+         prog_data->base.simd8 = true;
+      c->base.last_scratch = v.last_scratch;
    }
 
-   const unsigned *assembly = NULL;
-   vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
-                    mem_ctx, INTEL_DEBUG & DEBUG_VS);
-   assembly = g.generate_assembly(v.cfg, final_assembly_size);
+   if (!assembly) {
+      vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
+      if (!v.run()) {
+         if (prog) {
+            prog->LinkStatus = false;
+            ralloc_strcat(&prog->InfoLog, v.fail_msg);
+         }
+
+         _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
+                       v.fail_msg);
+
+         return NULL;
+      }
+
+      vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
+                       mem_ctx, INTEL_DEBUG & DEBUG_VS);
+      assembly = g.generate_assembly(v.cfg, final_assembly_size);
+   }
 
    if (unlikely(brw->perf_debug) && shader) {
       if (shader->compiled_once) {
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index 6391cf7..59b3951 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -68,6 +68,7 @@ static const struct dri_debug_control debug_control[] = {
    { "optimizer",   DEBUG_OPTIMIZER },
    { "noann",       DEBUG_NO_ANNOTATION },
    { "no8",         DEBUG_NO8 },
+   { "vec4vs",      DEBUG_VEC4VS },
    { NULL,    0 }
 };
 
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index e859be1..2f20616 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -63,6 +63,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_OPTIMIZER           (1 << 27)
 #define DEBUG_NO_ANNOTATION       (1 << 28)
 #define DEBUG_NO8                 (1 << 29)
+#define DEBUG_VEC4VS              (1 << 30)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.2.0



More information about the mesa-dev mailing list