[Mesa-dev] [PATCH v3 7/7] mesa/st: glsl_to_tgsi: tie in new temporary register merge approach

Gert Wollny gw.fossdev at gmail.com
Sun Jun 18 17:42:59 UTC 2017


This patch ties in the new temporary register lifetime estiamtion and
rename mapping evaluation. In order to enable it, the evironment
variable MESA_GLSL_TO_TGSI_NEW_MERGE must be set.

Performance to compare between the current and the new implementation
were measured by running the shader-db in one thread; Numbers are in
% of total run.

-----------------------------------------------------------
                     old     new(qsort)   new(std::sort)

------------------------ valgrind -------------------------
merge                0.21       0.20          0.13
estimate lifetime    0.03       0.05          0.05
evaluate mapping  (incl=0.16)   0.12          0.06
apply mapping        0.02       0.02          0.02

---   perf (approximate because of statistic sampling) -------
merge                0.24       0.20          0.14
estimate lifetime    0.03       0.05          0.05
evaluate mapping  (incl=0.16)   0.10          0.04
apply mapping        0.05       0.05          0.05
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp         | 33 ++++++++++++++++------
 .../tests/test_glsl_to_tgsi_lifetime.cpp           | 12 ++++++++
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ebe87a7821..f475b448c9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,7 +55,7 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 #include "st_shader_cache.h"
-#include "st_glsl_to_tgsi_private.h"
+#include "st_glsl_to_tgsi_temprename.h"
 
 #include "util/hash_table.h"
 #include <algorithm>
@@ -322,6 +322,7 @@ public:
 
    void merge_two_dsts(void);
    void merge_registers(void);
+   void merge_registers_alternative(void);
    void renumber_registers(void);
 
    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
@@ -567,7 +568,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
                if (swz > 1) {
                   dinst->src[j].double_reg2 = true;
                   dinst->src[j].index++;
-	       }
+               }
 
                if (swz & 1)
                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
@@ -2093,7 +2094,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
       st_src_reg temp = get_temp(glsl_type::uvec4_type);
       st_dst_reg temp_dst = st_dst_reg(temp);
       unsigned orig_swz = op[0].swizzle;
-      /* 
+      /*
        * To convert unsigned to 64-bit:
        * zero Y channel, copy X channel.
        */
@@ -2571,8 +2572,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
    if (index) {
 
       if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
-	  src.file == PROGRAM_INPUT)
-	 element_size = attrib_type_size(ir->type, true);
+          src.file == PROGRAM_INPUT)
+         element_size = attrib_type_size(ir->type, true);
       if (is_2D) {
          src.index2D = index->value.i[0];
          src.has_index2 = true;
@@ -2854,7 +2855,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
    if (type->is_dual_slot()) {
       l->index++;
       if (r->is_double_vertex_input == false)
-	 r->index++;
+         r->index++;
    }
 }
 
@@ -5137,6 +5138,18 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
    }
 }
 
+void
+glsl_to_tgsi_visitor::merge_registers_alternative(void)
+{
+   struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+   struct lifetime *lt = ralloc_array(mem_ctx, struct lifetime, this->next_temp);
+   estimate_temporary_lifetimes(mem_ctx, &this->instructions, this->next_temp, lt);
+   evaluate_remapping(mem_ctx, this->next_temp, lt, renames);
+   rename_temp_registers(&renames[0]);
+   ralloc_free(lt);
+   ralloc_free(renames);
+}
+
 /* Merges temporary registers together where possible to reduce the number of
  * registers needed to run a program.
  *
@@ -6601,8 +6614,12 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    while (v->eliminate_dead_code());
 
    v->merge_two_dsts();
-   if (!skip_merge_registers)
-      v->merge_registers();
+   if (!skip_merge_registers) {
+      if (getenv("MESA_GLSL_TO_TGSI_NEW_MERGE") != NULL)
+         v->merge_registers_alternative();
+      else
+         v->merge_registers();
+   }
    v->renumber_registers();
 
    /* Write the END instruction. */
diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
index 8fd62d1db3..d63daea80e 100644
--- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -1027,4 +1027,16 @@ TEST_F(RegisterRemapping, RegisterRemapping2)
    run(lt, expect);
 }
 
+TEST_F(RegisterRemapping, RegisterRemappingMergeAll)
+{
+   vector<lifetime> lt({{-1,-1},
+                        {0, 1},
+                        {1, 2},
+                        {2, 3},
+                        {3, 4},
+                       });
+   vector<int> expect({0, 1, 1, 1, 1});
+   run(lt, expect);
+}
+
 
-- 
2.13.0



More information about the mesa-dev mailing list