[Mesa-dev] [PATCH] tgsi: add support for flt64 constants

Tue Dec 23 14:07:24 PST 2014

From: Dave Airlie <airlied at redhat.com>

These act like flt32 except they take up two slots, and you
can only add 2 x flt64 constants in one slot.

The main reason they are different is we don't want to match half a flt64
constants against a flt32 constant in the matching code, we need to make
sure we treat both parts of the flt64 as an single structure.

Cleaned up printing/parsing by Ilia Mirkin <imirkin at alum.mit.edu>

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c     |  8 ++++
 src/gallium/auxiliary/tgsi/tgsi_parse.c    |  1 +
 src/gallium/auxiliary/tgsi/tgsi_strings.c  |  5 +-
 src/gallium/auxiliary/tgsi/tgsi_strings.h  |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c     | 22 +++++++++
 src/gallium/auxiliary/tgsi/tgsi_ureg.c     | 75 ++++++++++++++++++++++++++++--
 src/gallium/auxiliary/tgsi/tgsi_ureg.h     |  5 ++
 src/gallium/include/pipe/p_shader_tokens.h |  1 +
 8 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 972a37e..7ae4049 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -83,6 +83,7 @@ dump_enum(
 #define INSTID(I)       ctx->dump_printf( ctx, "% 3u", I )
 #define SID(I)          ctx->dump_printf( ctx, "%d", I )
 #define FLT(F)          ctx->dump_printf( ctx, "%10.4f", F )
+#define DBL(D)          ctx->dump_printf( ctx, "%10.8f", D )
 #define ENM(E,ENUMS)    dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
 const char *
@@ -238,6 +239,13 @@ dump_imm_data(struct tgsi_iterate_context *iter,
    assert( num_tokens <= 4 );
    for (i = 0; i < num_tokens; i++) {
       switch (data_type) {
+      case TGSI_IMM_FLOAT64: {
+         union di d;
+         d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+         DBL( d.d );
+         i++;
+         break;
+      }
       case TGSI_IMM_FLOAT32:
          FLT( data[i].Float );
          break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index f2370ed..d6c60aa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -148,6 +148,7 @@ tgsi_parse_token(
 
       switch (imm->Immediate.DataType) {
       case TGSI_IMM_FLOAT32:
+      case TGSI_IMM_FLOAT64:
          for (i = 0; i < imm_count; i++) {
             next_token(ctx, &imm->u[i].Float);
          }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index bd97544..9b727cf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -181,11 +181,12 @@ const char *tgsi_fs_coord_pixel_center_names[2] =
    "INTEGER"
 };
 
-const char *tgsi_immediate_type_names[3] =
+const char *tgsi_immediate_type_names[4] =
 {
    "FLT32",
    "UINT32",
-   "INT32"
+   "INT32",
+   "FLT64"
 };
 
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h b/src/gallium/auxiliary/tgsi/tgsi_strings.h
index c842746..90014a2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h
@@ -58,7 +58,7 @@ extern const char *tgsi_fs_coord_origin_names[2];
 
 extern const char *tgsi_fs_coord_pixel_center_names[2];
 
-extern const char *tgsi_immediate_type_names[3];
+extern const char *tgsi_immediate_type_names[4];
 
 
 const char *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index f965b01..5069d13 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -232,6 +232,24 @@ static boolean parse_float( const char **pcur, float *val )
    return TRUE;
 }
 
+static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1)
+{
+   const char *cur = *pcur;
+   union {
+      double dval;
+      uint32_t uval[2];
+   } v;
+
+   v.dval = strtod(cur, pcur);
+   if (*pcur == cur)
+      return FALSE;
+
+   *val0 = v.uval[0];
+   *val1 = v.uval[1];
+
+   return TRUE;
+}
+
 struct translate_ctx
 {
    const char *text;
@@ -1104,6 +1122,10 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type,
       }
 
       switch (type) {
+      case TGSI_IMM_FLOAT64:
+         ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
+         i++;
+         break;
       case TGSI_IMM_FLOAT32:
          ret = parse_float(&ctx->cur, &values[i].Float);
          break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index f524dfb..bc14cfd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -650,7 +650,48 @@ ureg_DECL_sampler_view(struct ureg_program *ureg,
 }
 
 static int
+match_or_expand_immediate64( const unsigned *v,
+                             int type,
+                             unsigned nr,
+                             unsigned *v2,
+                             unsigned *pnr2,
+                             unsigned *swizzle )
+{
+   unsigned nr2 = *pnr2;
+   unsigned i, j;
+   *swizzle = 0;
+
+   for (i = 0; i < nr; i += 2) {
+      boolean found = FALSE;
+
+      for (j = 0; j < nr2 && !found; j += 2) {
+         if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) {
+            *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2));
+            found = TRUE;
+         }
+      }
+      if (!found) {
+         if ((nr2) >= 4) {
+            return FALSE;
+         }
+
+         v2[nr2] = v[i];
+         v2[nr2 + 1] = v[i + 1];
+
+         *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2));
+         nr2 += 2;
+      }
+   }
+
+   /* Actually expand immediate only when fully succeeded.
+    */
+   *pnr2 = nr2;
+   return TRUE;
+}
+
+static int
 match_or_expand_immediate( const unsigned *v,
+                           int type,
                            unsigned nr,
                            unsigned *v2,
                            unsigned *pnr2,
@@ -659,6 +700,9 @@ match_or_expand_immediate( const unsigned *v,
    unsigned nr2 = *pnr2;
    unsigned i, j;
 
+   if (type == TGSI_IMM_FLOAT64)
+      return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle);
+
    *swizzle = 0;
 
    for (i = 0; i < nr; i++) {
@@ -707,6 +751,7 @@ decl_immediate( struct ureg_program *ureg,
          continue;
       }
       if (match_or_expand_immediate(v,
+                                    type,
                                     nr,
                                     ureg->immediate[i].value.u,
                                     &ureg->immediate[i].nr,
@@ -719,6 +764,7 @@ decl_immediate( struct ureg_program *ureg,
       i = ureg->nr_immediates++;
       ureg->immediate[i].type = type;
       if (match_or_expand_immediate(v,
+                                    type,
                                     nr,
                                     ureg->immediate[i].value.u,
                                     &ureg->immediate[i].nr,
@@ -733,10 +779,15 @@ out:
    /* Make sure that all referenced elements are from this immediate.
     * Has the effect of making size-one immediates into scalars.
     */
-   for (j = nr; j < 4; j++) {
-      swizzle |= (swizzle & 0x3) << (j * 2);
+   if (type == TGSI_IMM_FLOAT64) {
+      for (j = nr; j < 4; j+=2) {
+         swizzle |= (swizzle & 0xf) << (j * 2);
+      }
+   } else {
+      for (j = nr; j < 4; j++) {
+         swizzle |= (swizzle & 0x3) << (j * 2);
+      }
    }
-
    return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
                        (swizzle >> 0) & 0x3,
                        (swizzle >> 2) & 0x3,
@@ -763,6 +814,24 @@ ureg_DECL_immediate( struct ureg_program *ureg,
    return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
 }
 
+struct ureg_src
+ureg_DECL_immediate_f64( struct ureg_program *ureg,
+                         const double *v,
+                         unsigned nr )
+{
+   union {
+      unsigned u[4];
+      double d[2];
+   } fu;
+   unsigned int i;
+
+   assert((nr / 2) < 3);
+   for (i = 0; i < nr / 2; i++) {
+      fu.d[i] = v[i];
+   }
+
+   return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64);
+}
 
 struct ureg_src
 ureg_DECL_immediate_uint( struct ureg_program *ureg,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index f254b1e..56c602d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -229,6 +229,11 @@ ureg_DECL_immediate( struct ureg_program *,
                      unsigned nr );
 
 struct ureg_src
+ureg_DECL_immediate_f64( struct ureg_program *,
+                         const double *v,
+                         unsigned nr );
+
+struct ureg_src
 ureg_DECL_immediate_uint( struct ureg_program *,
                           const unsigned *v,
                           unsigned nr );
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 970e168..b6542d0 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -228,6 +228,7 @@ struct tgsi_declaration_array {
 #define TGSI_IMM_FLOAT32   0
 #define TGSI_IMM_UINT32    1
 #define TGSI_IMM_INT32     2
+#define TGSI_IMM_FLOAT64   3
 
 struct tgsi_immediate
 {
-- 
1.9.3