[Mesa-dev] [RFC PATCH 05/17] eir: implement actual register allocator

Fri May 10 09:09:03 UTC 2019

Some unit tests would be great.

Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
---
 src/etnaviv/compiler/eir.h                   |  10 +-
 src/etnaviv/compiler/eir_register_allocate.c | 176 ++++++++++++++++++-
 2 files changed, 184 insertions(+), 2 deletions(-)

diff --git a/src/etnaviv/compiler/eir.h b/src/etnaviv/compiler/eir.h
index f2652863de6..bbde10cde08 100644
--- a/src/etnaviv/compiler/eir.h
+++ b/src/etnaviv/compiler/eir.h
@@ -29,6 +29,7 @@
 #define H_EIR
 
 #include <assert.h>
+#include "compiler/shader_enums.h"
 #include "util/list.h"
 #include "util/u_dynarray.h"
 #include "util/u_math.h"
@@ -40,6 +41,7 @@ extern "C"{
 
 struct eir;
 struct eir_block;
+struct eir_compiler;
 struct eir_ra_reg_set;
 
 struct eir_register
@@ -149,10 +151,13 @@ struct eir
     * and the used components per register */
    struct util_dynarray reg_alloc;
 
-   /* keep track of number of allocated uniforms */
+   /* keep track of number of allocated uniforms - pre RA */
    struct util_dynarray uniform_alloc;
    unsigned uniform_offset;
 
+   /* total used temp register - post RA */
+   unsigned num_temps;
+
    /* Live ranges of temp registers */
    int *temp_start, *temp_end;
 };
@@ -301,6 +306,9 @@ eir_temp_range_end(const struct eir* ir, unsigned n);
 struct eir_ra_reg_set *
 eir_ra_alloc_reg_set(void *memctx);
 
+bool
+eir_register_allocate(struct eir *ir, gl_shader_stage type, struct eir_compiler *compiler);
+
 uint32_t *
 eir_assemble(const struct eir *ir, struct eir_info *info);
 
diff --git a/src/etnaviv/compiler/eir_register_allocate.c b/src/etnaviv/compiler/eir_register_allocate.c
index 332d686add9..e02fc37a5ef 100644
--- a/src/etnaviv/compiler/eir_register_allocate.c
+++ b/src/etnaviv/compiler/eir_register_allocate.c
@@ -164,7 +164,7 @@ eir_ra_alloc_reg_set(void *memctx)
       }
    }
 
-	unsigned int **q_values = ralloc_array(set, unsigned *, EIR_NUM_REG_CLASSES);
+   unsigned int **q_values = ralloc_array(set, unsigned *, EIR_NUM_REG_CLASSES);
    for (int i = 0; i < EIR_NUM_REG_CLASSES; i++) {
       q_values[i] = rzalloc_array(q_values, unsigned, EIR_NUM_REG_CLASSES);
       for (int j = 0; j < EIR_NUM_REG_CLASSES; j++)
@@ -177,3 +177,177 @@ eir_ra_alloc_reg_set(void *memctx)
 
    return set;
 }
+
+static bool
+interferes(const struct eir *ir, int a, int b)
+{
+   return !((eir_temp_range_end(ir, a) <= eir_temp_range_start(ir, b)) ||
+            (eir_temp_range_end(ir, b) <= eir_temp_range_start(ir, a)));
+}
+
+static inline void
+reswizzle(struct eir_register *reg, int virt_reg)
+{
+   static const unsigned swizzle[EIR_NUM_REG_TYPES] = {
+      INST_SWIZ(0, 1, 2, 3), /* XYZW */
+      INST_SWIZ(0, 1, 2, 2), /* XYZ */
+      INST_SWIZ(0, 1, 3, 3), /* XYW */
+      INST_SWIZ(0, 2, 3, 3), /* XZW */
+      INST_SWIZ(1, 2, 3, 3), /* YZW */
+      INST_SWIZ(0, 1, 1, 1), /* XY */
+      INST_SWIZ(0, 2, 2, 2), /* XZ */
+      INST_SWIZ(0, 3, 3, 3), /* XW */
+      INST_SWIZ(1, 2, 2, 2), /* YZ */
+      INST_SWIZ(1, 3, 3, 3), /* YW */
+      INST_SWIZ(2, 3, 3, 3), /* ZW */
+      INST_SWIZ(0, 0, 0, 0), /* X */
+      INST_SWIZ(1, 1, 1 ,1), /* Y */
+      INST_SWIZ(2, 2, 2, 2), /* Z */
+      INST_SWIZ(3, 3, 3, 3), /* W */
+   };
+
+   const int type = eir_reg_get_type(virt_reg);
+   reg->swizzle = swizzle[type];
+}
+
+static unsigned inline
+result(struct ra_graph *g, unsigned reg)
+{
+   const int virt_reg = ra_get_node_reg(g, reg);
+
+   return etna_reg_get_base(virt_reg);
+}
+
+static void
+assign(struct eir_register *reg, struct ra_graph *g, unsigned *num_temps)
+{
+   if (reg->type != EIR_REG_TEMP)
+      return;
+
+   const int virt_reg = ra_get_node_reg(g, reg->index);
+
+   /*
+    * convert virtual register back to hw register
+    * and change swizzle if needed
+    */
+   reg->index = etna_reg_get_base(virt_reg);
+
+   if (eir_reg_get_type(virt_reg) != EIR_REG_CLASS_VEC4)
+      reswizzle(reg, virt_reg);
+
+   *num_temps = MAX2(*num_temps, reg->index + 1);
+}
+
+bool
+eir_register_allocate(struct eir *ir, gl_shader_stage type, struct eir_compiler *compiler)
+{
+   int num = util_dynarray_num_elements(&ir->reg_alloc, unsigned);
+
+   if (num == 0)
+      return true;
+
+   int frag_varying_pos = -1;
+
+   if (type == MESA_SHADER_FRAGMENT) {
+      for (unsigned i = 0; i < ir->num_inputs; i++) {
+         if (ir->inputs[i].slot == VARYING_SLOT_POS) {
+            frag_varying_pos = i;
+            break;
+         }
+      }
+
+      /* allocate one extra register for hardwired t0 register */
+      if (frag_varying_pos == -1) {
+         num++;
+         frag_varying_pos = num - 1;
+      }
+   }
+
+   struct ra_graph *g = ra_alloc_interference_graph(compiler->set->regs, num);
+   unsigned num_temps = 0;
+
+   assert(g);
+   assert(ir->temp_start);
+   assert(ir->temp_end);
+
+   unsigned i = 0;
+   util_dynarray_foreach(&ir->reg_alloc, unsigned, num_components) {
+      switch (*num_components) {
+      case 1:
+         ra_set_node_class(g, i, compiler->set->class[EIR_REG_CLASS_VIRT_SCALAR]);
+         break;
+      case 2:
+         ra_set_node_class(g, i, compiler->set->class[EIR_REG_CLASS_VIRT_VEC2]);
+         break;
+      case 3:
+         ra_set_node_class(g, i, compiler->set->class[EIR_REG_CLASS_VIRT_VEC3]);
+         break;
+      case 4:
+         ra_set_node_class(g, i, compiler->set->class[EIR_REG_CLASS_VEC4]);
+         break;
+      default:
+         unreachable("not supported num_components");
+         break;
+      }
+
+      i++;
+   }
+
+   /* TODO: handle scalar opcodes */
+#if 0
+   eir_for_each_block(block, ir) {
+      eir_for_each_inst(inst, block) {
+         if (gc_op_is_scalar(inst->gc.opcode)) {
+            ra_set_node_reg(g, )
+         }
+      }
+   }
+#endif
+
+   for (unsigned i = 0; i < num; i++)
+      for (unsigned j = 0; j < i; j++)
+	      if (interferes(ir, i, j))
+	         ra_add_node_interference(g, i, j);
+
+   if (type == MESA_SHADER_FRAGMENT) {
+      for (unsigned i = 0; i < num; i++) {
+         for (int j = 0; j < i; j++) {
+            ra_add_node_interference(g, i, j);
+         }
+      }
+
+      /* hard code frag_color_out_idx to register t0 */
+      ra_set_node_reg(g, frag_varying_pos, 0);
+   }
+
+   bool success = ra_allocate(g);
+   if (!success)
+      goto out;
+
+   eir_for_each_block(block, ir) {
+      eir_for_each_inst(inst, block) {
+         assign(&inst->dst, g, &num_temps);
+         assign(&inst->src[0], g, &num_temps);
+         assign(&inst->src[1], g, &num_temps);
+         assign(&inst->src[2], g, &num_temps);
+      }
+   }
+
+   /* update outputs array */
+   for (unsigned i = 0; i < ir->num_outputs; i++)
+      ir->outputs[i].reg = result(g, i);
+
+   ir->num_temps = num_temps;
+
+   /* free live ranges information as they are not needed anymore */
+   ralloc_free(ir->temp_start);
+   ralloc_free(ir->temp_end);
+
+   ir->temp_start = NULL;
+   ir->temp_end = NULL;
+
+out:
+   ralloc_free(g);
+
+   return success;
+}
-- 
2.21.0