Mesa (main): freedreno/ir3: Add float immed "FLUT" support

Tue Jul 13 14:57:40 UTC 2021

Module: Mesa
Branch: main
Commit: 7f5a01a47dc87b9513aa156030bb8d03963ba40f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f5a01a47dc87b9513aa156030bb8d03963ba40f

Author: Rob Clark <robdclark at chromium.org>
Date:   Mon Jan 25 09:43:24 2021 -0800

freedreno/ir3: Add float immed "FLUT" support

We can encode a limited set of float immeds into cat2 instructions,
using hw's float lookup table (FLUT) feature.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/36
Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8705>

---

 src/freedreno/ir3/ir3.c    | 51 ++++++++++++++++++++++++++++++++++++++++++----
 src/freedreno/ir3/ir3.h    |  2 ++
 src/freedreno/ir3/ir3_cp.c | 28 +++++++++++++------------
 3 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 909228de328..41d10393895 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -31,6 +31,7 @@
 #include <string.h>
 
 #include "util/bitscan.h"
+#include "util/half_float.h"
 #include "util/ralloc.h"
 #include "util/u_math.h"
 
@@ -701,6 +702,51 @@ ir3_fixup_src_type(struct ir3_instruction *instr)
    }
 }
 
+/**
+ * Map a floating point immed to FLUT (float lookup table) value,
+ * returns negative for immediates that cannot be mapped.
+ */
+int
+ir3_flut(struct ir3_register *src_reg)
+{
+   static const struct {
+      uint32_t f32;
+      uint16_t f16;
+   } flut[] = {
+         { .f32 = 0x00000000, .f16 = 0x0000 },    /* 0.0 */
+         { .f32 = 0x3f000000, .f16 = 0x3800 },    /* 0.5 */
+         { .f32 = 0x3f800000, .f16 = 0x3c00 },    /* 1.0 */
+         { .f32 = 0x40000000, .f16 = 0x4000 },    /* 2.0 */
+         { .f32 = 0x402df854, .f16 = 0x4170 },    /* e */
+         { .f32 = 0x40490fdb, .f16 = 0x4248 },    /* pi */
+         { .f32 = 0x3ea2f983, .f16 = 0x3518 },    /* 1/pi */
+         { .f32 = 0x3f317218, .f16 = 0x398c },    /* 1/log2(e) */
+         { .f32 = 0x3fb8aa3b, .f16 = 0x3dc5 },    /* log2(e) */
+         { .f32 = 0x3e9a209b, .f16 = 0x34d1 },    /* 1/log2(10) */
+         { .f32 = 0x40549a78, .f16 = 0x42a5 },    /* log2(10) */
+         { .f32 = 0x40800000, .f16 = 0x4400 },    /* 4.0 */
+   };
+
+   if (src_reg->flags & IR3_REG_HALF) {
+      /* Note that half-float immeds are already lowered to 16b in nir: */
+      uint32_t imm = src_reg->uim_val;
+      for (unsigned i = 0; i < ARRAY_SIZE(flut); i++) {
+         if (flut[i].f16 == imm) {
+            return i;
+         }
+      }
+   } else {
+      uint32_t imm = src_reg->uim_val;
+      for (unsigned i = 0; i < ARRAY_SIZE(flut); i++) {
+         if (flut[i].f32 == imm) {
+            return i;
+         }
+      }
+   }
+
+   return -1;
+}
+
 static unsigned
 cp_flags(unsigned flags)
 {
@@ -782,10 +828,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
       break;
    case 2:
       valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST |
-                    IR3_REG_RELATIV | IR3_REG_SHARED;
-
-      if (ir3_cat2_int(instr->opc))
-         valid_flags |= IR3_REG_IMMED;
+                    IR3_REG_RELATIV | IR3_REG_IMMED | IR3_REG_SHARED;
 
       if (flags & ~valid_flags)
          return false;
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 48f8192a702..53590ed8afb 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -700,6 +700,8 @@ void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
 void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
 void ir3_fixup_src_type(struct ir3_instruction *instr);
 
+int ir3_flut(struct ir3_register *src_reg);
+
 bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
 
 #include "util/set.h"
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index 78a5247059a..50c43f3303f 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -440,22 +440,24 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
          return true;
       }
 
-      /* NOTE: seems we can only do immed integers, so don't
-       * need to care about float.  But we do need to handle
-       * abs/neg *before* checking that the immediate requires
-       * few enough bits to encode:
-       *
-       * TODO: do we need to do something to avoid accidentally
-       * catching a float immed?
-       */
       if (src_reg->flags & IR3_REG_IMMED) {
          int32_t iim_val = src_reg->iim_val;
 
          debug_assert((opc_cat(instr->opc) == 1) ||
-                      (opc_cat(instr->opc) == 6) || is_meta(instr) ||
-                      ir3_cat2_int(instr->opc) ||
+                      (opc_cat(instr->opc) == 2) ||
+                      (opc_cat(instr->opc) == 6) ||
+                      is_meta(instr) ||
                       (is_mad(instr->opc) && (n == 0)));
 
+         if ((opc_cat(instr->opc) == 2) &&
+               !ir3_cat2_int(instr->opc)) {
+            iim_val = ir3_flut(src_reg);
+            if (iim_val < 0) {
+               /* Fall back to trying to load the immediate as a const: */
+               return lower_immed(ctx, instr, n, src_reg, new_flags);
+            }
+         }
+
          if (new_flags & IR3_REG_SABS)
             iim_val = abs(iim_val);
 
@@ -476,9 +478,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
             instr->srcs[n] = src_reg;
 
             return true;
-         } else if (lower_immed(ctx, instr, n, src_reg, new_flags)) {
-            /* Fell back to loading the immediate as a const */
-            return true;
+         } else {
+            /* Fall back to trying to load the immediate as a const: */
+            return lower_immed(ctx, instr, n, src_reg, new_flags);
          }
       }
    }