[Mesa-dev] [PATCH 19/23] gallium/tgsi: add support for 64-bit integer immediates.

Thu Jun 9 14:43:16 UTC 2016

On 06/08/2016 06:48 PM, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds support to TGSI for 64-bit integer immediates.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>   src/gallium/auxiliary/tgsi/tgsi_dump.c     |  14 ++
>   src/gallium/auxiliary/tgsi/tgsi_exec.c     | 244 ++++++++++++++++++++++++++++-
>   src/gallium/auxiliary/tgsi/tgsi_parse.c    |   2 +
>   src/gallium/auxiliary/tgsi/tgsi_text.c     |  44 ++++++
>   src/gallium/auxiliary/tgsi/tgsi_ureg.c     |  45 +++++-
>   src/gallium/auxiliary/tgsi/tgsi_ureg.h     |  10 ++
>   src/gallium/include/pipe/p_shader_tokens.h |   2 +
>   7 files changed, 358 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> index d59b7ff..614bcb2 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> @@ -254,6 +254,20 @@ dump_imm_data(struct tgsi_iterate_context *iter,
>            i++;
>            break;
>         }
> +      case TGSI_IMM_INT64: {
> +         union di d;
> +         d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
> +         UID( d.i );
> +         i++;
> +         break;
> +      }
> +      case TGSI_IMM_UINT64: {
> +         union di d;
> +         d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
> +         UID( d.ui );
> +         i++;
> +         break;
> +      }
>         case TGSI_IMM_FLOAT32:
>            if (ctx->dump_float_as_hex)
>               HFLT( data[i].Float );
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 1457c06..c929475 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -77,6 +77,8 @@
>   union tgsi_double_channel {
>      double d[TGSI_QUAD_SIZE];
>      unsigned u[TGSI_QUAD_SIZE][2];
> +   uint64_t u64[TGSI_QUAD_SIZE];
> +   int64_t i64[TGSI_QUAD_SIZE];
>   };
>
>   struct tgsi_double_vector {
> @@ -692,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst,
>      dst->d[3] = (double)src->u[3];
>   }
>
> +static void
> +micro_i64abs(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
> +   dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
> +   dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
> +   dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
> +}
> +
> +static void
> +micro_i64sgn(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
> +   dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
> +   dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
> +   dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
> +}
> +
> +static void
> +micro_i64neg(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = -src->i64[0];
> +   dst->i64[1] = -src->i64[1];
> +   dst->i64[2] = -src->i64[2];
> +   dst->i64[3] = -src->i64[3];
> +}
> +
> +static void
> +micro_u64seq(union tgsi_double_channel *dst,
> +           const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
> +}

I haven't been following the 'double' work so dumb questions/comments:

First, could you document the new opcodes in gallium/docs/source/tgsi.rst?

In the case of micro_u64seq(), etc. why doesn't it do

dst->u64[0] = src[0].u64[0] == src[1].u64[0] ? ~0UL : 0U;

Don't we want a 64-bit boolean result?  I can imagine wanting to use the 
result of a U64SEQ instruction in some subsequent bit-wise masking 
instructions with 64-bit operands.  A 32-bit result wouldn't work for that.

Finally, it seems this patch and 22/23 could be merged.  This patch adds 
a bunch of micro_*() functions that the compiler will warn as unused 
until patch 22 is applied.  Maybe the tgsi_exec.[ch] changes could be 
one patch and the other tgsi changes in another.  Not a big deal though.

-Brian

> +
> +static void
> +micro_u64sne(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_i64slt(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_u64slt(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_i64sge(union tgsi_double_channel *dst,
> +           const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_u64sge(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
> +   dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
> +   dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
> +   dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
> +}
> +
> +static void
> +micro_u64max(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
> +}
> +
> +static void
> +micro_i64max(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
> +   dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
> +   dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
> +   dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
> +}
> +
> +static void
> +micro_u64min(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
> +}
> +
> +static void
> +micro_i64min(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
> +   dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
> +   dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
> +   dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
> +}
> +
> +static void
> +micro_u64add(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] + src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] + src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] + src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] + src[1].u64[3];
> +}
> +
> +static void
> +micro_u64mul(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] * src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] * src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] * src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] * src[1].u64[3];
> +}
> +
> +static void
> +micro_u64div(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] / src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] / src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] / src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] / src[1].u64[3];
> +}
> +
> +static void
> +micro_i64div(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src[0].i64[0] / src[1].i64[0];
> +   dst->i64[1] = src[0].i64[1] / src[1].i64[1];
> +   dst->i64[2] = src[0].i64[2] / src[1].i64[2];
> +   dst->i64[3] = src[0].i64[3] / src[1].i64[3];
> +}
> +
> +static void
> +micro_u64mod(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->u64[0] = src[0].u64[0] % src[1].u64[0];
> +   dst->u64[1] = src[0].u64[1] % src[1].u64[1];
> +   dst->u64[2] = src[0].u64[2] % src[1].u64[2];
> +   dst->u64[3] = src[0].u64[3] % src[1].u64[3];
> +}
> +
> +static void
> +micro_i64mod(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src)
> +{
> +   dst->i64[0] = src[0].i64[0] % src[1].i64[0];
> +   dst->i64[1] = src[0].i64[1] % src[1].i64[1];
> +   dst->i64[2] = src[0].i64[2] % src[1].i64[2];
> +   dst->i64[3] = src[0].i64[3] % src[1].i64[3];
> +}
> +
> +static void
> +micro_u64shl(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src0,
> +             union tgsi_exec_channel *src1)
> +{
> +   unsigned masked_count;
> +   masked_count = src1->u[0] & 0x3f;
> +   dst->u64[0] = src0->u64[0] << masked_count;
> +   masked_count = src1->u[1] & 0x3f;
> +   dst->u64[1] = src0->u64[1] << masked_count;
> +   masked_count = src1->u[2] & 0x3f;
> +   dst->u64[2] = src0->u64[2] << masked_count;
> +   masked_count = src1->u[3] & 0x3f;
> +   dst->u64[3] = src0->u64[3] << masked_count;
> +}
> +
> +static void
> +micro_i64shr(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src0,
> +             union tgsi_exec_channel *src1)
> +{
> +   unsigned masked_count;
> +   masked_count = src1->u[0] & 0x3f;
> +   dst->i64[0] = src0->i64[0] >> masked_count;
> +   masked_count = src1->u[1] & 0x3f;
> +   dst->i64[1] = src0->i64[1] >> masked_count;
> +   masked_count = src1->u[2] & 0x3f;
> +   dst->i64[2] = src0->i64[2] >> masked_count;
> +   masked_count = src1->u[3] & 0x3f;
> +   dst->i64[3] = src0->i64[3] >> masked_count;
> +}
> +
> +static void
> +micro_u64shr(union tgsi_double_channel *dst,
> +             const union tgsi_double_channel *src0,
> +             union tgsi_exec_channel *src1)
> +{
> +   unsigned masked_count;
> +   masked_count = src1->u[0] & 0x3f;
> +   dst->u64[0] = src0->u64[0] >> masked_count;
> +   masked_count = src1->u[1] & 0x3f;
> +   dst->u64[1] = src0->u64[1] >> masked_count;
> +   masked_count = src1->u[2] & 0x3f;
> +   dst->u64[2] = src0->u64[2] >> masked_count;
> +   masked_count = src1->u[3] & 0x3f;
> +   dst->u64[3] = src0->u64[3] >> masked_count;
> +}
> +
>   enum tgsi_exec_datatype {
>      TGSI_EXEC_DATA_FLOAT,
>      TGSI_EXEC_DATA_INT,
>      TGSI_EXEC_DATA_UINT,
> -   TGSI_EXEC_DATA_DOUBLE
> +   TGSI_EXEC_DATA_DOUBLE,
> +   TGSI_EXEC_DATA_INT64,
> +   TGSI_EXEC_DATA_UINT64,
>   };
>
>   /*
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
> index 16564dd..940af7d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
> @@ -155,12 +155,14 @@ tgsi_parse_token(
>            break;
>
>         case TGSI_IMM_UINT32:
> +      case TGSI_IMM_UINT64:
>            for (i = 0; i < imm_count; i++) {
>               next_token(ctx, &imm->u[i].Uint);
>            }
>            break;
>
>         case TGSI_IMM_INT32:
> +      case TGSI_IMM_INT64:
>            for (i = 0; i < imm_count; i++) {
>               next_token(ctx, &imm->u[i].Int);
>            }
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
> index 8bdec06..be80842 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_text.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
> @@ -295,6 +295,42 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1)
>      return TRUE;
>   }
>
> +static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1)
> +{
> +   const char *cur = *pcur;
> +   union {
> +      int64_t i64val;
> +      uint32_t uval[2];
> +   } v;
> +
> +   v.i64val = strtoll(cur, (char**)pcur, 0);
> +   if (*pcur == cur)
> +      return FALSE;
> +
> +   *val0 = v.uval[0];
> +   *val1 = v.uval[1];
> +
> +   return TRUE;
> +}
> +
> +static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1)
> +{
> +   const char *cur = *pcur;
> +   union {
> +      uint64_t u64val;
> +      uint32_t uval[2];
> +   } v;
> +
> +   v.u64val = strtoull(cur, (char**)pcur, 0);
> +   if (*pcur == cur)
> +      return FALSE;
> +
> +   *val0 = v.uval[0];
> +   *val1 = v.uval[1];
> +
> +   return TRUE;
> +}
> +
>   struct translate_ctx
>   {
>      const char *text;
> @@ -1228,6 +1264,14 @@ static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type,
>            ret = parse_double(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
>            i++;
>            break;
> +      case TGSI_IMM_INT64:
> +         ret = parse_int64(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
> +         i++;
> +         break;
> +      case TGSI_IMM_UINT64:
> +         ret = parse_uint64(&ctx->cur, &values[i].Uint, &values[i+1].Uint);
> +         i++;
> +         break;
>         case TGSI_IMM_FLOAT32:
>            ret = parse_float(&ctx->cur, &values[i].Float);
>            break;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> index b67c383..6ad514d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> @@ -792,7 +792,9 @@ match_or_expand_immediate( const unsigned *v,
>      unsigned nr2 = *pnr2;
>      unsigned i, j;
>
> -   if (type == TGSI_IMM_FLOAT64)
> +   if (type == TGSI_IMM_FLOAT64 ||
> +       type == TGSI_IMM_UINT64 ||
> +       type == TGSI_IMM_INT64)
>         return match_or_expand_immediate64(v, type, nr, v2, pnr2, swizzle);
>
>      *swizzle = 0;
> @@ -871,7 +873,9 @@ out:
>      /* Make sure that all referenced elements are from this immediate.
>       * Has the effect of making size-one immediates into scalars.
>       */
> -   if (type == TGSI_IMM_FLOAT64) {
> +   if (type == TGSI_IMM_FLOAT64 ||
> +       type == TGSI_IMM_UINT64 ||
> +       type == TGSI_IMM_INT64) {
>         for (j = nr; j < 4; j+=2) {
>            swizzle |= (swizzle & 0xf) << (j * 2);
>         }
> @@ -971,6 +975,43 @@ ureg_DECL_immediate_int( struct ureg_program *ureg,
>      return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
>   }
>
> +struct ureg_src
> +ureg_DECL_immediate_uint64( struct ureg_program *ureg,
> +                            const uint64_t *v,
> +                            unsigned nr )
> +{
> +   union {
> +      unsigned u[4];
> +      uint64_t u64[2];
> +   } fu;
> +   unsigned int i;
> +
> +   assert((nr / 2) < 3);
> +   for (i = 0; i < nr / 2; i++) {
> +      fu.u64[i] = v[i];
> +   }
> +
> +   return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64);
> +}
> +
> +struct ureg_src
> +ureg_DECL_immediate_int64( struct ureg_program *ureg,
> +                           const int64_t *v,
> +                           unsigned nr )
> +{
> +   union {
> +      unsigned u[4];
> +      int64_t i64[2];
> +   } fu;
> +   unsigned int i;
> +
> +   assert((nr / 2) < 3);
> +   for (i = 0; i < nr / 2; i++) {
> +      fu.i64[i] = v[i];
> +   }
> +
> +   return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64);
> +}
>
>   void
>   ureg_emit_src( struct ureg_program *ureg,
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
> index b4258fd..c2c2f1a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
> @@ -271,6 +271,16 @@ ureg_DECL_immediate_int( struct ureg_program *,
>                            const int *v,
>                            unsigned nr );
>
> +struct ureg_src
> +ureg_DECL_immediate_uint64( struct ureg_program *,
> +                            const uint64_t *v,
> +                            unsigned nr );
> +
> +struct ureg_src
> +ureg_DECL_immediate_int64( struct ureg_program *,
> +                           const int64_t *v,
> +                           unsigned nr );
> +
>   void
>   ureg_DECL_constant2D(struct ureg_program *ureg,
>                        unsigned first,
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 37ce771..38e06bf 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -244,6 +244,8 @@ enum tgsi_imm_type {
>      TGSI_IMM_UINT32,
>      TGSI_IMM_INT32,
>      TGSI_IMM_FLOAT64,
> +   TGSI_IMM_UINT64,
> +   TGSI_IMM_INT64,
>   };
>
>   struct tgsi_immediate
>