[Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)

Mon Sep 19 16:18:56 UTC 2016

Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
> 
> This just adds the basic support for 64-bit opcodes,
> and the new types.
> 
> v2: add conversion opcodes.
> add documentation.
> v3:
> - make docs more consistent
> - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64
> 
> Reviewed-by: Marek Olšák <marek.olsak at amd.com> (v2)
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/auxiliary/tgsi/tgsi_info.c     |  92 +++++++++--
>  src/gallium/auxiliary/tgsi/tgsi_info.h     |   4 +-
>  src/gallium/docs/source/tgsi.rst           | 240 +++++++++++++++++++++++++++++
>  src/gallium/include/pipe/p_shader_tokens.h |  46 ++++--
>  4 files changed, 362 insertions(+), 20 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 60e0f2c..18e1bc8 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
>     { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
>     { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 },      /* removed */
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
>     { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
>     { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 },      /* removed */
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
> @@ -258,20 +258,42 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
>  };
>  
>  const struct tgsi_opcode_info *
>  tgsi_get_opcode_info( uint opcode )
>  {
>     static boolean firsttime = 1;
>  
>     if (firsttime) {
>        unsigned i;
>        firsttime = 0;
> @@ -375,20 +397,26 @@ tgsi_opcode_infer_type( uint opcode )
>     case TGSI_OPCODE_UARL:
>     case TGSI_OPCODE_IABS:
>     case TGSI_OPCODE_ISSG:
>     case TGSI_OPCODE_IMUL_HI:
>     case TGSI_OPCODE_IBFE:
>     case TGSI_OPCODE_IMSB:
>     case TGSI_OPCODE_DSEQ:
>     case TGSI_OPCODE_DSGE:
>     case TGSI_OPCODE_DSLT:
>     case TGSI_OPCODE_DSNE:
> +   case TGSI_OPCODE_U64SEQ:
> +   case TGSI_OPCODE_U64SNE:
> +   case TGSI_OPCODE_U64SLT:
> +   case TGSI_OPCODE_U64SGE:
> +   case TGSI_OPCODE_I64SLT:
> +   case TGSI_OPCODE_I64SGE:
>        return TGSI_TYPE_SIGNED;
>     case TGSI_OPCODE_DADD:
>     case TGSI_OPCODE_DABS:
>     case TGSI_OPCODE_DFMA:
>     case TGSI_OPCODE_DNEG:
>     case TGSI_OPCODE_DMUL:
>     case TGSI_OPCODE_DMAX:
>     case TGSI_OPCODE_DMIN:
>     case TGSI_OPCODE_DRCP:
>     case TGSI_OPCODE_DSQRT:
> @@ -398,21 +426,47 @@ tgsi_opcode_infer_type( uint opcode )
>     case TGSI_OPCODE_DFRAC:
>     case TGSI_OPCODE_DRSQ:
>     case TGSI_OPCODE_DTRUNC:
>     case TGSI_OPCODE_DCEIL:
>     case TGSI_OPCODE_DFLR:
>     case TGSI_OPCODE_DROUND:
>     case TGSI_OPCODE_DSSG:
>     case TGSI_OPCODE_F2D:
>     case TGSI_OPCODE_I2D:
>     case TGSI_OPCODE_U2D:
> +   case TGSI_OPCODE_U642D:
> +   case TGSI_OPCODE_I642D:
>        return TGSI_TYPE_DOUBLE;
> +   case TGSI_OPCODE_U64MAX:
> +   case TGSI_OPCODE_U64MIN:
> +   case TGSI_OPCODE_U64ADD:
> +   case TGSI_OPCODE_U64MUL:
> +   case TGSI_OPCODE_U64DIV:
> +   case TGSI_OPCODE_U64MOD:
> +   case TGSI_OPCODE_U64SHL:
> +   case TGSI_OPCODE_U64SHR:
> +   case TGSI_OPCODE_F2U64:
> +   case TGSI_OPCODE_D2U64:
> +      return TGSI_TYPE_UNSIGNED64;
> +   case TGSI_OPCODE_I64MAX:
> +   case TGSI_OPCODE_I64MIN:
> +   case TGSI_OPCODE_I64ABS:
> +   case TGSI_OPCODE_I64SSG:
> +   case TGSI_OPCODE_I64NEG:
> +   case TGSI_OPCODE_I64SHR:
> +   case TGSI_OPCODE_I64DIV:
> +   case TGSI_OPCODE_I64MOD:
> +   case TGSI_OPCODE_F2I64:
> +   case TGSI_OPCODE_U2I64:
> +   case TGSI_OPCODE_I2I64:
> +   case TGSI_OPCODE_D2I64:
> +      return TGSI_TYPE_SIGNED64;
>     default:
>        return TGSI_TYPE_FLOAT;
>     }
>  }
>  
>  /*
>   * infer the source type of a TGSI opcode.
>   */
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_src_type( uint opcode )
> @@ -423,45 +477,63 @@ tgsi_opcode_infer_src_type( uint opcode )
>     case TGSI_OPCODE_BREAKC:
>     case TGSI_OPCODE_U2F:
>     case TGSI_OPCODE_U2D:
>     case TGSI_OPCODE_UADD:
>     case TGSI_OPCODE_SWITCH:
>     case TGSI_OPCODE_CASE:
>     case TGSI_OPCODE_SAMPLE_I:
>     case TGSI_OPCODE_SAMPLE_I_MS:
>     case TGSI_OPCODE_UMUL_HI:
>     case TGSI_OPCODE_UP2H:
> +   case TGSI_OPCODE_U2I64:
>        return TGSI_TYPE_UNSIGNED;
>     case TGSI_OPCODE_IMUL_HI:
>     case TGSI_OPCODE_I2F:
>     case TGSI_OPCODE_I2D:
> +   case TGSI_OPCODE_I2I64:
>        return TGSI_TYPE_SIGNED;
>     case TGSI_OPCODE_ARL:
>     case TGSI_OPCODE_ARR:
>     case TGSI_OPCODE_TXQ_LZ:
>     case TGSI_OPCODE_F2D:
>     case TGSI_OPCODE_F2I:
>     case TGSI_OPCODE_F2U:
>     case TGSI_OPCODE_FSEQ:
>     case TGSI_OPCODE_FSGE:
>     case TGSI_OPCODE_FSLT:
>     case TGSI_OPCODE_FSNE:
>     case TGSI_OPCODE_UCMP:
> +   case TGSI_OPCODE_F2U64:
> +   case TGSI_OPCODE_F2I64:
>        return TGSI_TYPE_FLOAT;
>     case TGSI_OPCODE_D2F:
>     case TGSI_OPCODE_D2U:
>     case TGSI_OPCODE_D2I:
>     case TGSI_OPCODE_DSEQ:
>     case TGSI_OPCODE_DSGE:
>     case TGSI_OPCODE_DSLT:
>     case TGSI_OPCODE_DSNE:
> +   case TGSI_OPCODE_D2U64:
> +   case TGSI_OPCODE_D2I64:
>        return TGSI_TYPE_DOUBLE;
> +   case TGSI_OPCODE_U64SEQ:
> +   case TGSI_OPCODE_U64SNE:
> +   case TGSI_OPCODE_U64SLT:
> +   case TGSI_OPCODE_U64SGE:
> +   case TGSI_OPCODE_U642F:
> +   case TGSI_OPCODE_U642D:
> +      return TGSI_TYPE_UNSIGNED64;
> +   case TGSI_OPCODE_I64SLT:
> +   case TGSI_OPCODE_I64SGE:
> +   case TGSI_OPCODE_I642F:
> +   case TGSI_OPCODE_I642D:
> +            return TGSI_TYPE_SIGNED64;
>     default:
>        return tgsi_opcode_infer_type(opcode);
>     }
>  }
>  
>  /*
>   * infer the destination type of a TGSI opcode.
>   */
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_dst_type( uint opcode )
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index c43bdfd..8830f5a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -91,21 +91,23 @@ tgsi_get_opcode_name( uint opcode );
>  
>  const char *
>  tgsi_get_processor_name( uint processor );
>  
>  enum tgsi_opcode_type {
>     TGSI_TYPE_UNTYPED, /* for MOV */
>     TGSI_TYPE_VOID,
>     TGSI_TYPE_UNSIGNED,
>     TGSI_TYPE_SIGNED,
>     TGSI_TYPE_FLOAT,
> -   TGSI_TYPE_DOUBLE
> +   TGSI_TYPE_DOUBLE,
> +   TGSI_TYPE_UNSIGNED64,
> +   TGSI_TYPE_SIGNED64,
>  };
>  
>  static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
>  {
>     if (type == TGSI_TYPE_DOUBLE)
>        return true;
>     return false;
>  }
>  
>  enum tgsi_opcode_type
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 881aef6..000ea3a 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -2075,20 +2075,260 @@ Perform a * b + c with no intermediate rounding step.
>     dst.zw = double(src0.y)
>  
>  .. opcode:: D2U - Double to Unsigned Int
>  
>  .. math::
>  
>     dst.x = unsigned(src0.xy)
>  
>     dst.y = unsigned(src0.zw)
>  
> +64-bit Integer ISA
> +^^^^^^^^^^^^^^^^^^
> +
> +The 64-bit integer opcodes reinterpret four-component vectors into
> +two-component vectors with 64-bits in each component.
> +
> +.. opcode:: I64ABS - 64-bit Integer Absolute Value
> +
> +  dst.xy = |src0.xy|
> +  dst.zw = |src0.zw|
> +
> +.. opcode:: I64NEG - 64-bit Integer Negate
> +
> +  Two's complement.
> +
> +.. math::
> +
> +  dst.xy = -src.xy
> +  dst.zw = -src.zw
> +
> +.. opcode:: I64SSG - 64-bit Integer Set Sign
> +
> +.. math::
> +
> +  dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
> +  dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
> +
> +.. opcode:: U64ADD - 64-bit Integer Add
> +
> +.. math::
> +
> +  dst.xy = src0.xy + src1.xy
> +  dst.zw = src0.zw + src1.zw
> +
> +.. opcode:: U64MUL - 64-bit Integer Multiply
> +
> +.. math::
> +
> +  dst.xy = src0.xy * src1.xy
> +  dst.zw = src0.zw * src1.zw
> +
> +.. opcode:: U64SEQ - 64-bit Integer Set on Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy == src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw == src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SNE - 64-bit Integer Set on Not Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy != src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw != src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than
> +
> +.. math::
> +
> +  dst.x = src0.xy < src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SLT - 64-bit Signed Integer Set on Less Than
> +
> +.. math::
> +
> +  dst.x = src0.xy < src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SGE - 64-bit Signed Integer Set on Greater Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64MIN - Minimum of 64-bit Signed Integers
> +
> +.. math::
> +
> +  dst.xy = min(src0.xy, src1.xy)
> +  dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> +  dst.xy = min(src0.xy, src1.xy)
> +  dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: I64MAX - Maximum of 64-bit Signed Integers
> +
> +.. math::
> +
> +  dst.xy = max(src0.xy, src1.xy)
> +  dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> +  dst.xy = max(src0.xy, src1.xy)
> +  dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer
> +
> +   The shift count is masked with 0x1f before the shift is applied.
Another 0x1f -> 0x3f :-).

Otherwise, looks alright to me, though still not sure if I think filling
all the opcode gaps that way is a good idea.

Reviewed-by: Roland Scheidegger <sroland at vmware.com>

> +
> +.. math::
> +
> +  dst.xy = src0.xy << (0x3f \& src1.x)
> +  dst.zw = src0.zw << (0x3f \& src1.y)
> +
> +.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer)
> +
> +   The shift count is masked with 0x3f before the shift is applied.
> +
> +.. math::
> +
> +  dst.xy = src0.xy >> (0x3f \& src1.x)
> +  dst.zw = src0.zw >> (0x3f \& src1.y)
> +
> +.. opcode:: U64SHR - Logical Shift Right (of 64-bit Unsigned Integer)
> +
> +   The shift count is masked with 0x3f before the shift is applied.
> +
> +.. math::
> +
> +  dst.xy = src0.xy >> (unsigned) (0x3f \& src1.x)
> +  dst.zw = src0.zw >> (unsigned) (0x3f \& src1.y)
> +
> +.. opcode:: I64DIV - 64-bit Signed Integer Division
> +
> +.. math::
> +
> +  dst.xy = src0.xy \ src1.xy
> +  dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64DIV - 64-bit Unsigned Integer Division
> +
> +.. math::
> +
> +  dst.xy = src0.xy \ src1.xy
> +  dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder
> +
> +.. math::
> +
> +  dst.xy = src0.xy \bmod src1.xy
> +  dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: I64MOD - 64-bit Signed Integer Remainder
> +
> +.. math::
> +
> +  dst.xy = src0.xy \bmod src1.xy
> +  dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: F2U64 - Float to 64-bit Unsigned Int
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.x
> +   dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: F2I64 - Float to 64-bit Int
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.x
> +   dst.zw = (int64_t) src0.y
> +
> +.. opcode:: U2I64 - Unsigned Integer to 64-bit Integer
> +
> +   This is a zero extension.
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.x
> +   dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: I2I64 - Signed Integer to 64-bit Integer
> +
> +   This is a sign extension.
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.x
> +   dst.zw = (int64_t) src0.y
> +
> +.. opcode:: D2U64 - Double to 64-bit Unsigned Int
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.xy
> +   dst.zw = (uint64_t) src0.zw
> +
> +.. opcode:: D2I64 - Double to 64-bit Int
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.xy
> +   dst.zw = (int64_t) src0.zw
> +
> +.. opcode:: U642F - 64-bit unsigned integer to float
> +
> +.. math::
> +
> +   dst.x = (float) src0.xy
> +   dst.y = (float) src0.zw
> +
> +.. opcode:: I642F - 64-bit Int to Float
> +
> +.. math::
> +
> +   dst.x = (float) src0.xy
> +   dst.y = (float) src0.zw
> +
> +.. opcode:: U642D - 64-bit unsigned integer to double
> +
> +.. math::
> +
> +   dst.xy = (double) src0.xy
> +   dst.zw = (double) src0.zw
> +
> +.. opcode:: I642D - 64-bit Int to double
> +
> +.. math::
> +
> +   dst.xy = (double) src0.xy
> +   dst.zw = (double) src0.zw
> +
>  .. _samplingopcodes:
>  
>  Resource Sampling Opcodes
>  ^^^^^^^^^^^^^^^^^^^^^^^^^
>  
>  Those opcodes follow very closely semantics of the respective Direct3D
>  instructions. If in doubt double check Direct3D documentation.
>  Note that the swizzle on SVIEW (src1) determines texel swizzling
>  after lookup.
>  
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 39ce9ea..a8d323a 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -338,59 +338,61 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_MIN                 12
>  #define TGSI_OPCODE_MAX                 13
>  #define TGSI_OPCODE_SLT                 14
>  #define TGSI_OPCODE_SGE                 15
>  #define TGSI_OPCODE_MAD                 16
>  #define TGSI_OPCODE_SUB                 17
>  #define TGSI_OPCODE_LRP                 18
>  #define TGSI_OPCODE_FMA                 19
>  #define TGSI_OPCODE_SQRT                20
>  #define TGSI_OPCODE_DP2A                21
> -                                /* gap */
> +#define TGSI_OPCODE_F2U64               22
> +#define TGSI_OPCODE_F2I64               23
>  #define TGSI_OPCODE_FRC                 24
>  #define TGSI_OPCODE_CLAMP               25
>  #define TGSI_OPCODE_FLR                 26
>  #define TGSI_OPCODE_ROUND               27
>  #define TGSI_OPCODE_EX2                 28
>  #define TGSI_OPCODE_LG2                 29
>  #define TGSI_OPCODE_POW                 30
>  #define TGSI_OPCODE_XPD                 31
> -                                /* gap */
> +#define TGSI_OPCODE_U2I64               32
>  #define TGSI_OPCODE_ABS                 33
> -                                /* gap */
> +#define TGSI_OPCODE_I2I64               34
>  #define TGSI_OPCODE_DPH                 35
>  #define TGSI_OPCODE_COS                 36
>  #define TGSI_OPCODE_DDX                 37
>  #define TGSI_OPCODE_DDY                 38
>  #define TGSI_OPCODE_KILL                39 /* unconditional */
>  #define TGSI_OPCODE_PK2H                40
>  #define TGSI_OPCODE_PK2US               41
>  #define TGSI_OPCODE_PK4B                42
>  #define TGSI_OPCODE_PK4UB               43
> -                                /* gap */
> +#define TGSI_OPCODE_D2U64               44
>  #define TGSI_OPCODE_SEQ                 45
> -                                /* gap */
> +#define TGSI_OPCODE_D2I64               46
>  #define TGSI_OPCODE_SGT                 47
>  #define TGSI_OPCODE_SIN                 48
>  #define TGSI_OPCODE_SLE                 49
>  #define TGSI_OPCODE_SNE                 50
> -                                /* gap */
> +#define TGSI_OPCODE_U642D               51
>  #define TGSI_OPCODE_TEX                 52
>  #define TGSI_OPCODE_TXD                 53
>  #define TGSI_OPCODE_TXP                 54
>  #define TGSI_OPCODE_UP2H                55
>  #define TGSI_OPCODE_UP2US               56
>  #define TGSI_OPCODE_UP4B                57
>  #define TGSI_OPCODE_UP4UB               58
> -                                /* gap */
> +#define TGSI_OPCODE_U642F               59
> +#define TGSI_OPCODE_I642F               60
>  #define TGSI_OPCODE_ARR                 61
> -                                /* gap */
> +#define TGSI_OPCODE_I642D               62
>  #define TGSI_OPCODE_CAL                 63
>  #define TGSI_OPCODE_RET                 64
>  #define TGSI_OPCODE_SSG                 65 /* SGN */
>  #define TGSI_OPCODE_CMP                 66
>  #define TGSI_OPCODE_SCS                 67
>  #define TGSI_OPCODE_TXB                 68
>                                  /* gap */
>  #define TGSI_OPCODE_DIV                 70
>  #define TGSI_OPCODE_DP2                 71
>  #define TGSI_OPCODE_TXL                 72
> @@ -561,21 +563,47 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_DTRUNC              218 /* nvc0 */
>  #define TGSI_OPCODE_DCEIL               219 /* nvc0 */
>  #define TGSI_OPCODE_DFLR                220 /* nvc0 */
>  #define TGSI_OPCODE_DROUND              221 /* nvc0 */
>  #define TGSI_OPCODE_DSSG                222
>  
>  #define TGSI_OPCODE_VOTE_ANY            223
>  #define TGSI_OPCODE_VOTE_ALL            224
>  #define TGSI_OPCODE_VOTE_EQ             225
>  
> -#define TGSI_OPCODE_LAST                226
> +#define TGSI_OPCODE_U64SEQ              226
> +#define TGSI_OPCODE_U64SNE              227
> +#define TGSI_OPCODE_I64SLT              228
> +#define TGSI_OPCODE_U64SLT              229
> +#define TGSI_OPCODE_I64SGE              230
> +#define TGSI_OPCODE_U64SGE              231
> +
> +#define TGSI_OPCODE_I64MIN              232
> +#define TGSI_OPCODE_U64MIN              233
> +#define TGSI_OPCODE_I64MAX              234
> +#define TGSI_OPCODE_U64MAX              235
> +
> +#define TGSI_OPCODE_I64ABS              236
> +#define TGSI_OPCODE_I64SSG              237
> +#define TGSI_OPCODE_I64NEG              238
> +
> +#define TGSI_OPCODE_U64ADD              239
> +#define TGSI_OPCODE_U64MUL              240
> +#define TGSI_OPCODE_U64SHL              241
> +#define TGSI_OPCODE_I64SHR              242
> +#define TGSI_OPCODE_U64SHR              243
> +
> +#define TGSI_OPCODE_I64DIV              244
> +#define TGSI_OPCODE_U64DIV              245
> +#define TGSI_OPCODE_I64MOD              246
> +#define TGSI_OPCODE_U64MOD              247
> +#define TGSI_OPCODE_LAST                248
>  
>  /**
>   * Opcode is the operation code to execute. A given operation defines the
>   * semantics how the source registers (if any) are interpreted and what is
>   * written to the destination registers (if any) as a result of execution.
>   *
>   * NumDstRegs and NumSrcRegs is the number of destination and source registers,
>   * respectively. For a given operation code, those numbers are fixed and are
>   * present here only for convenience.
>   *
>