Mesa (master): etnaviv: move ra into own file

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jul 2 17:13:32 UTC 2020


Module: Mesa
Branch: master
Commit: 64cdc1311b9385eb3611061dcbfe5ad8a183a896
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=64cdc1311b9385eb3611061dcbfe5ad8a183a896

Author: Christian Gmeiner <christian.gmeiner at gmail.com>
Date:   Tue Jun 30 11:37:54 2020 +0200

etnaviv: move ra into own file

Signed-off-by: Christian Gmeiner <christian.gmeiner at gmail.com>
Acked-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5690>

---

 src/gallium/drivers/etnaviv/Makefile.sources       |   1 +
 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c | 308 +--------------------
 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h |  91 +++++-
 .../drivers/etnaviv/etnaviv_compiler_nir_ra.c      | 248 +++++++++++++++++
 src/gallium/drivers/etnaviv/meson.build            |   1 +
 5 files changed, 342 insertions(+), 307 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/Makefile.sources b/src/gallium/drivers/etnaviv/Makefile.sources
index 5835dea69bb..f765088117d 100644
--- a/src/gallium/drivers/etnaviv/Makefile.sources
+++ b/src/gallium/drivers/etnaviv/Makefile.sources
@@ -20,6 +20,7 @@ C_SOURCES :=  \
 	etnaviv_compiler_nir.c \
 	etnaviv_compiler_nir_emit.c \
 	etnaviv_compiler_nir_liveness.c \
+	etnaviv_compiler_nir_ra.c \
 	etnaviv_compiler_tgsi.c \
 	etnaviv_context.c \
 	etnaviv_context.h \
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
index eafb82680f8..63b5152a4a6 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -277,77 +277,6 @@ const_src(struct etna_compile *c, nir_const_value *value, unsigned num_component
    return SRC_CONST(i - 1, swiz);
 }
 
-/* Swizzles and write masks can be used to layer virtual non-interfering
- * registers on top of the real VEC4 registers. For example, the virtual
- * VEC3_XYZ register and the virtual SCALAR_W register that use the same
- * physical VEC4 base register do not interfere.
- */
-enum reg_class {
-   REG_CLASS_VIRT_SCALAR,
-   REG_CLASS_VIRT_VEC2,
-   REG_CLASS_VIRT_VEC3,
-   REG_CLASS_VEC4,
-   /* special vec2 class for fast transcendentals, limited to XY or ZW */
-   REG_CLASS_VIRT_VEC2T,
-   /* special classes for LOAD - contiguous components */
-   REG_CLASS_VIRT_VEC2C,
-   REG_CLASS_VIRT_VEC3C,
-   NUM_REG_CLASSES,
-};
-
-enum reg_type {
-   REG_TYPE_VEC4,
-   REG_TYPE_VIRT_VEC3_XYZ,
-   REG_TYPE_VIRT_VEC3_XYW,
-   REG_TYPE_VIRT_VEC3_XZW,
-   REG_TYPE_VIRT_VEC3_YZW,
-   REG_TYPE_VIRT_VEC2_XY,
-   REG_TYPE_VIRT_VEC2_XZ,
-   REG_TYPE_VIRT_VEC2_XW,
-   REG_TYPE_VIRT_VEC2_YZ,
-   REG_TYPE_VIRT_VEC2_YW,
-   REG_TYPE_VIRT_VEC2_ZW,
-   REG_TYPE_VIRT_SCALAR_X,
-   REG_TYPE_VIRT_SCALAR_Y,
-   REG_TYPE_VIRT_SCALAR_Z,
-   REG_TYPE_VIRT_SCALAR_W,
-   REG_TYPE_VIRT_VEC2T_XY,
-   REG_TYPE_VIRT_VEC2T_ZW,
-   REG_TYPE_VIRT_VEC2C_XY,
-   REG_TYPE_VIRT_VEC2C_YZ,
-   REG_TYPE_VIRT_VEC2C_ZW,
-   REG_TYPE_VIRT_VEC3C_XYZ,
-   REG_TYPE_VIRT_VEC3C_YZW,
-   NUM_REG_TYPES,
-};
-
-/* writemask when used as dest */
-static const uint8_t
-reg_writemask[NUM_REG_TYPES] = {
-   [REG_TYPE_VEC4] = 0xf,
-   [REG_TYPE_VIRT_SCALAR_X] = 0x1,
-   [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
-   [REG_TYPE_VIRT_VEC2_XY] = 0x3,
-   [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
-   [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
-   [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
-   [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
-   [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
-   [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
-   [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
-   [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
-   [REG_TYPE_VIRT_SCALAR_W] = 0x8,
-   [REG_TYPE_VIRT_VEC2_XW] = 0x9,
-   [REG_TYPE_VIRT_VEC2_YW] = 0xa,
-   [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
-   [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
-   [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
-   [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
-   [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
-   [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
-   [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
-};
-
 /* how to swizzle when used as a src */
 static const uint8_t
 reg_swiz[NUM_REG_TYPES] = {
@@ -402,62 +331,6 @@ reg_dst_swiz[NUM_REG_TYPES] = {
    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
 };
 
-static inline int reg_get_type(int virt_reg)
-{
-   return virt_reg % NUM_REG_TYPES;
-}
-
-static inline int reg_get_base(struct etna_compile *c, int virt_reg)
-{
-   /* offset by 1 to avoid reserved position register */
-   if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
-      return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
-   return virt_reg / NUM_REG_TYPES;
-}
-
-/* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
- * (fs registers are offset by 1 to avoid reserving r0)
- */
-#define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
-
-static inline int reg_get_class(int virt_reg)
-{
-   switch (reg_get_type(virt_reg)) {
-   case REG_TYPE_VEC4:
-      return REG_CLASS_VEC4;
-   case REG_TYPE_VIRT_VEC3_XYZ:
-   case REG_TYPE_VIRT_VEC3_XYW:
-   case REG_TYPE_VIRT_VEC3_XZW:
-   case REG_TYPE_VIRT_VEC3_YZW:
-      return REG_CLASS_VIRT_VEC3;
-   case REG_TYPE_VIRT_VEC2_XY:
-   case REG_TYPE_VIRT_VEC2_XZ:
-   case REG_TYPE_VIRT_VEC2_XW:
-   case REG_TYPE_VIRT_VEC2_YZ:
-   case REG_TYPE_VIRT_VEC2_YW:
-   case REG_TYPE_VIRT_VEC2_ZW:
-      return REG_CLASS_VIRT_VEC2;
-   case REG_TYPE_VIRT_SCALAR_X:
-   case REG_TYPE_VIRT_SCALAR_Y:
-   case REG_TYPE_VIRT_SCALAR_Z:
-   case REG_TYPE_VIRT_SCALAR_W:
-      return REG_CLASS_VIRT_SCALAR;
-   case REG_TYPE_VIRT_VEC2T_XY:
-   case REG_TYPE_VIRT_VEC2T_ZW:
-      return REG_CLASS_VIRT_VEC2T;
-   case REG_TYPE_VIRT_VEC2C_XY:
-   case REG_TYPE_VIRT_VEC2C_YZ:
-   case REG_TYPE_VIRT_VEC2C_ZW:
-      return REG_CLASS_VIRT_VEC2C;
-   case REG_TYPE_VIRT_VEC3C_XYZ:
-   case REG_TYPE_VIRT_VEC3C_YZW:
-      return REG_CLASS_VIRT_VEC3C;
-   }
-
-   assert(false);
-   return 0;
-}
-
 /* nir_src to allocated register */
 static hw_src
 ra_src(struct etna_compile *c, nir_src *src)
@@ -571,183 +444,6 @@ ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz)
    };
 }
 
-/* precomputed by register_allocate  */
-static unsigned int *q_values[] = {
-   (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
-   (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
-   (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
-   (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
-   (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
-   (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
-   (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
-};
-
-static void
-ra_assign(struct etna_compile *c, nir_shader *shader)
-{
-   struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
-                  NUM_REG_TYPES, false);
-
-   /* classes always be created from index 0, so equal to the class enum
-    * which represents a register with (c+1) components
-    */
-   for (int c = 0; c < NUM_REG_CLASSES; c++)
-      ra_alloc_reg_class(regs);
-   /* add each register of each class */
-   for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
-      ra_class_add_reg(regs, reg_get_class(r), r);
-   /* set conflicts */
-   for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
-      for (int i = 0; i < NUM_REG_TYPES; i++) {
-         for (int j = 0; j < i; j++) {
-            if (reg_writemask[i] & reg_writemask[j]) {
-               ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
-                                         NUM_REG_TYPES * r + j);
-            }
-         }
-      }
-   }
-   ra_set_finalize(regs, q_values);
-
-   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
-   /* liveness and interference */
-
-   nir_index_blocks(impl);
-   nir_index_ssa_defs(impl);
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr(instr, block)
-         instr->pass_flags = 0;
-   }
-
-   /* this gives an approximation/upper limit on how many nodes are needed
-    * (some ssa values do not represent an allocated register)
-    */
-   unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
-   unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
-   memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
-   struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
-
-   unsigned num_nodes = etna_live_defs(impl, defs, live_map);
-   struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
-
-   /* set classes from num_components */
-   for (unsigned i = 0; i < num_nodes; i++) {
-      nir_instr *instr = defs[i].instr;
-      nir_dest *dest = defs[i].dest;
-      unsigned comp = nir_dest_num_components(*dest) - 1;
-
-      if (instr->type == nir_instr_type_alu &&
-          c->specs->has_new_transcendentals) {
-         switch (nir_instr_as_alu(instr)->op) {
-         case nir_op_fdiv:
-         case nir_op_flog2:
-         case nir_op_fsin:
-         case nir_op_fcos:
-            assert(dest->is_ssa);
-            comp = REG_CLASS_VIRT_VEC2T;
-         default:
-            break;
-         }
-      }
-
-      if (instr->type == nir_instr_type_intrinsic) {
-         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-         /* can't have dst swizzle or sparse writemask on UBO loads */
-         if (intr->intrinsic == nir_intrinsic_load_ubo) {
-            assert(dest == &intr->dest);
-            if (dest->ssa.num_components == 2)
-               comp = REG_CLASS_VIRT_VEC2C;
-            if (dest->ssa.num_components == 3)
-               comp = REG_CLASS_VIRT_VEC3C;
-         }
-      }
-
-      ra_set_node_class(g, i, comp);
-   }
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_dest *dest = dest_for_instr(instr);
-         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-         unsigned reg;
-
-         switch (intr->intrinsic) {
-         case nir_intrinsic_store_deref: {
-            /* don't want outputs to be swizzled
-             * TODO: better would be to set the type to X/XY/XYZ/XYZW
-             * TODO: what if fragcoord.z is read after writing fragdepth?
-             */
-            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
-            unsigned index = live_map[src_index(impl, &intr->src[1])];
-
-            if (shader->info.stage == MESA_SHADER_FRAGMENT &&
-                deref->var->data.location == FRAG_RESULT_DEPTH) {
-               ra_set_node_reg(g, index, REG_FRAG_DEPTH);
-            } else {
-               ra_set_node_class(g, index, REG_CLASS_VEC4);
-            }
-         } continue;
-         case nir_intrinsic_load_input:
-            reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
-               REG_TYPE_VIRT_SCALAR_X,
-               REG_TYPE_VIRT_VEC2_XY,
-               REG_TYPE_VIRT_VEC3_XYZ,
-               REG_TYPE_VEC4,
-            }[nir_dest_num_components(*dest) - 1];
-            break;
-         case nir_intrinsic_load_instance_id:
-            reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
-            break;
-         default:
-            continue;
-         }
-
-         ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
-      }
-   }
-
-   /* add interference for intersecting live ranges */
-   for (unsigned i = 0; i < num_nodes; i++) {
-      assert(defs[i].live_start < defs[i].live_end);
-      for (unsigned j = 0; j < i; j++) {
-         if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
-            continue;
-         ra_add_node_interference(g, i, j);
-      }
-   }
-
-   ralloc_free(defs);
-
-   /* Allocate registers */
-   ASSERTED bool ok = ra_allocate(g);
-   assert(ok);
-
-   c->g = g;
-   c->regs = regs;
-   c->live_map = live_map;
-   c->num_nodes = num_nodes;
-}
-
-static unsigned
-ra_finish(struct etna_compile *c)
-{
-   /* TODO: better way to get number of registers used? */
-   unsigned j = 0;
-   for (unsigned i = 0; i < c->num_nodes; i++) {
-      j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1);
-   }
-
-   ralloc_free(c->g);
-   ralloc_free(c->regs);
-   ralloc_free(c->live_map);
-
-   return j;
-}
-
 static void
 emit_alu(struct etna_compile *c, nir_alu_instr * alu)
 {
@@ -1265,11 +961,11 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
    nir_convert_from_ssa(shader, true);
    nir_opt_dce(shader);
 
-   ra_assign(c, shader);
+   etna_ra_assign(c, shader);
 
    emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body);
 
-   *num_temps = ra_finish(c);
+   *num_temps = etna_ra_finish(c);
    *num_consts = c->const_count;
    return true;
 }
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h
index 8ad67cb6a7e..4808767b640 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h
@@ -30,7 +30,6 @@
 #include "compiler/nir/nir.h"
 #include "etnaviv_asm.h"
 #include "etnaviv_compiler.h"
-#include "util/register_allocate.h"
 
 struct etna_compile {
    nir_shader *nir;
@@ -235,6 +234,96 @@ struct live_def {
 unsigned
 etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map);
 
+/* Swizzles and write masks can be used to layer virtual non-interfering
+ * registers on top of the real VEC4 registers. For example, the virtual
+ * VEC3_XYZ register and the virtual SCALAR_W register that use the same
+ * physical VEC4 base register do not interfere.
+ */
+enum reg_class {
+   REG_CLASS_VIRT_SCALAR,
+   REG_CLASS_VIRT_VEC2,
+   REG_CLASS_VIRT_VEC3,
+   REG_CLASS_VEC4,
+   /* special vec2 class for fast transcendentals, limited to XY or ZW */
+   REG_CLASS_VIRT_VEC2T,
+   /* special classes for LOAD - contiguous components */
+   REG_CLASS_VIRT_VEC2C,
+   REG_CLASS_VIRT_VEC3C,
+   NUM_REG_CLASSES,
+};
+
+enum reg_type {
+   REG_TYPE_VEC4,
+   REG_TYPE_VIRT_VEC3_XYZ,
+   REG_TYPE_VIRT_VEC3_XYW,
+   REG_TYPE_VIRT_VEC3_XZW,
+   REG_TYPE_VIRT_VEC3_YZW,
+   REG_TYPE_VIRT_VEC2_XY,
+   REG_TYPE_VIRT_VEC2_XZ,
+   REG_TYPE_VIRT_VEC2_XW,
+   REG_TYPE_VIRT_VEC2_YZ,
+   REG_TYPE_VIRT_VEC2_YW,
+   REG_TYPE_VIRT_VEC2_ZW,
+   REG_TYPE_VIRT_SCALAR_X,
+   REG_TYPE_VIRT_SCALAR_Y,
+   REG_TYPE_VIRT_SCALAR_Z,
+   REG_TYPE_VIRT_SCALAR_W,
+   REG_TYPE_VIRT_VEC2T_XY,
+   REG_TYPE_VIRT_VEC2T_ZW,
+   REG_TYPE_VIRT_VEC2C_XY,
+   REG_TYPE_VIRT_VEC2C_YZ,
+   REG_TYPE_VIRT_VEC2C_ZW,
+   REG_TYPE_VIRT_VEC3C_XYZ,
+   REG_TYPE_VIRT_VEC3C_YZW,
+   NUM_REG_TYPES,
+};
+
+/* writemask when used as dest */
+static const uint8_t
+reg_writemask[NUM_REG_TYPES] = {
+   [REG_TYPE_VEC4] = 0xf,
+   [REG_TYPE_VIRT_SCALAR_X] = 0x1,
+   [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
+   [REG_TYPE_VIRT_VEC2_XY] = 0x3,
+   [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
+   [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
+   [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
+   [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
+   [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
+   [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
+   [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
+   [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
+   [REG_TYPE_VIRT_SCALAR_W] = 0x8,
+   [REG_TYPE_VIRT_VEC2_XW] = 0x9,
+   [REG_TYPE_VIRT_VEC2_YW] = 0xa,
+   [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
+   [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
+   [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
+   [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
+   [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
+   [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
+   [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
+};
+
+static inline int reg_get_type(int virt_reg)
+{
+   return virt_reg % NUM_REG_TYPES;
+}
+
+static inline int reg_get_base(struct etna_compile *c, int virt_reg)
+{
+   /* offset by 1 to avoid reserved position register */
+   if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
+      return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
+   return virt_reg / NUM_REG_TYPES;
+}
+
+void
+etna_ra_assign(struct etna_compile *c, nir_shader *shader);
+
+unsigned
+etna_ra_finish(struct etna_compile *c);
+
 static inline void
 emit_inst(struct etna_compile *c, struct etna_inst *inst)
 {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c
new file mode 100644
index 00000000000..b322f035279
--- /dev/null
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2019 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan at marek.ca>
+ */
+
+#include "etnaviv_compiler_nir.h"
+#include "util/register_allocate.h"
+
+/* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
+ * (fs registers are offset by 1 to avoid reserving r0)
+ */
+#define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
+
+/* precomputed by register_allocate  */
+static unsigned int *q_values[] = {
+   (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
+   (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
+   (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
+   (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
+   (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
+   (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
+   (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
+};
+
+static inline int reg_get_class(int virt_reg)
+{
+   switch (reg_get_type(virt_reg)) {
+   case REG_TYPE_VEC4:
+      return REG_CLASS_VEC4;
+   case REG_TYPE_VIRT_VEC3_XYZ:
+   case REG_TYPE_VIRT_VEC3_XYW:
+   case REG_TYPE_VIRT_VEC3_XZW:
+   case REG_TYPE_VIRT_VEC3_YZW:
+      return REG_CLASS_VIRT_VEC3;
+   case REG_TYPE_VIRT_VEC2_XY:
+   case REG_TYPE_VIRT_VEC2_XZ:
+   case REG_TYPE_VIRT_VEC2_XW:
+   case REG_TYPE_VIRT_VEC2_YZ:
+   case REG_TYPE_VIRT_VEC2_YW:
+   case REG_TYPE_VIRT_VEC2_ZW:
+      return REG_CLASS_VIRT_VEC2;
+   case REG_TYPE_VIRT_SCALAR_X:
+   case REG_TYPE_VIRT_SCALAR_Y:
+   case REG_TYPE_VIRT_SCALAR_Z:
+   case REG_TYPE_VIRT_SCALAR_W:
+      return REG_CLASS_VIRT_SCALAR;
+   case REG_TYPE_VIRT_VEC2T_XY:
+   case REG_TYPE_VIRT_VEC2T_ZW:
+      return REG_CLASS_VIRT_VEC2T;
+   case REG_TYPE_VIRT_VEC2C_XY:
+   case REG_TYPE_VIRT_VEC2C_YZ:
+   case REG_TYPE_VIRT_VEC2C_ZW:
+      return REG_CLASS_VIRT_VEC2C;
+   case REG_TYPE_VIRT_VEC3C_XYZ:
+   case REG_TYPE_VIRT_VEC3C_YZW:
+      return REG_CLASS_VIRT_VEC3C;
+   }
+
+   assert(false);
+   return 0;
+}
+
+void
+etna_ra_assign(struct etna_compile *c, nir_shader *shader)
+{
+   struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
+                  NUM_REG_TYPES, false);
+
+   /* classes always be created from index 0, so equal to the class enum
+    * which represents a register with (c+1) components
+    */
+   for (int c = 0; c < NUM_REG_CLASSES; c++)
+      ra_alloc_reg_class(regs);
+   /* add each register of each class */
+   for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
+      ra_class_add_reg(regs, reg_get_class(r), r);
+   /* set conflicts */
+   for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
+      for (int i = 0; i < NUM_REG_TYPES; i++) {
+         for (int j = 0; j < i; j++) {
+            if (reg_writemask[i] & reg_writemask[j]) {
+               ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
+                                         NUM_REG_TYPES * r + j);
+            }
+         }
+      }
+   }
+   ra_set_finalize(regs, q_values);
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   /* liveness and interference */
+
+   nir_index_blocks(impl);
+   nir_index_ssa_defs(impl);
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block)
+         instr->pass_flags = 0;
+   }
+
+   /* this gives an approximation/upper limit on how many nodes are needed
+    * (some ssa values do not represent an allocated register)
+    */
+   unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
+   unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
+   memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
+   struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
+
+   unsigned num_nodes = etna_live_defs(impl, defs, live_map);
+   struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
+
+   /* set classes from num_components */
+   for (unsigned i = 0; i < num_nodes; i++) {
+      nir_instr *instr = defs[i].instr;
+      nir_dest *dest = defs[i].dest;
+      unsigned comp = nir_dest_num_components(*dest) - 1;
+
+      if (instr->type == nir_instr_type_alu &&
+          c->specs->has_new_transcendentals) {
+         switch (nir_instr_as_alu(instr)->op) {
+         case nir_op_fdiv:
+         case nir_op_flog2:
+         case nir_op_fsin:
+         case nir_op_fcos:
+            assert(dest->is_ssa);
+            comp = REG_CLASS_VIRT_VEC2T;
+         default:
+            break;
+         }
+      }
+
+      if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         /* can't have dst swizzle or sparse writemask on UBO loads */
+         if (intr->intrinsic == nir_intrinsic_load_ubo) {
+            assert(dest == &intr->dest);
+            if (dest->ssa.num_components == 2)
+               comp = REG_CLASS_VIRT_VEC2C;
+            if (dest->ssa.num_components == 3)
+               comp = REG_CLASS_VIRT_VEC3C;
+         }
+      }
+
+      ra_set_node_class(g, i, comp);
+   }
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_dest *dest = dest_for_instr(instr);
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         unsigned reg;
+
+         switch (intr->intrinsic) {
+         case nir_intrinsic_store_deref: {
+            /* don't want outputs to be swizzled
+             * TODO: better would be to set the type to X/XY/XYZ/XYZW
+             * TODO: what if fragcoord.z is read after writing fragdepth?
+             */
+            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+            unsigned index = live_map[src_index(impl, &intr->src[1])];
+
+            if (shader->info.stage == MESA_SHADER_FRAGMENT &&
+                deref->var->data.location == FRAG_RESULT_DEPTH) {
+               ra_set_node_reg(g, index, REG_FRAG_DEPTH);
+            } else {
+               ra_set_node_class(g, index, REG_CLASS_VEC4);
+            }
+         } continue;
+         case nir_intrinsic_load_input:
+            reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
+               REG_TYPE_VIRT_SCALAR_X,
+               REG_TYPE_VIRT_VEC2_XY,
+               REG_TYPE_VIRT_VEC3_XYZ,
+               REG_TYPE_VEC4,
+            }[nir_dest_num_components(*dest) - 1];
+            break;
+         case nir_intrinsic_load_instance_id:
+            reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
+            break;
+         default:
+            continue;
+         }
+
+         ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
+      }
+   }
+
+   /* add interference for intersecting live ranges */
+   for (unsigned i = 0; i < num_nodes; i++) {
+      assert(defs[i].live_start < defs[i].live_end);
+      for (unsigned j = 0; j < i; j++) {
+         if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
+            continue;
+         ra_add_node_interference(g, i, j);
+      }
+   }
+
+   ralloc_free(defs);
+
+   /* Allocate registers */
+   ASSERTED bool ok = ra_allocate(g);
+   assert(ok);
+
+   c->g = g;
+   c->regs = regs;
+   c->live_map = live_map;
+   c->num_nodes = num_nodes;
+}
+
+unsigned
+etna_ra_finish(struct etna_compile *c)
+{
+   /* TODO: better way to get number of registers used? */
+   unsigned j = 0;
+   for (unsigned i = 0; i < c->num_nodes; i++) {
+      j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1);
+   }
+
+   ralloc_free(c->g);
+   ralloc_free(c->regs);
+   ralloc_free(c->live_map);
+
+   return j;
+}
diff --git a/src/gallium/drivers/etnaviv/meson.build b/src/gallium/drivers/etnaviv/meson.build
index 93199abe05a..7eaa4cca92c 100644
--- a/src/gallium/drivers/etnaviv/meson.build
+++ b/src/gallium/drivers/etnaviv/meson.build
@@ -39,6 +39,7 @@ files_etnaviv = files(
   'etnaviv_compiler_nir.c',
   'etnaviv_compiler_nir_emit.c',
   'etnaviv_compiler_nir_liveness.c',
+  'etnaviv_compiler_nir_ra.c',
   'etnaviv_compiler_tgsi.c',
   'etnaviv_context.c',
   'etnaviv_context.h',



More information about the mesa-commit mailing list