[Mesa-dev] [PATCH 1/9] gallium: add opcode and types for 64-bit integers. (v2)

Roland Scheidegger sroland at vmware.com
Fri Sep 16 15:34:50 UTC 2016


Am 16.09.2016 um 15:48 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
> 
> This just adds the basic support for 64-bit opcodes,
> and the new types.
> 
> v2: add conversion opcodes.
> add documentation.
> 
> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/auxiliary/tgsi/tgsi_info.c     |  92 +++++++++--
>  src/gallium/auxiliary/tgsi/tgsi_info.h     |   4 +-
>  src/gallium/docs/source/tgsi.rst           | 246 +++++++++++++++++++++++++++++
>  src/gallium/include/pipe/p_shader_tokens.h |  46 ++++--
>  4 files changed, 368 insertions(+), 20 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 60e0f2c..e319be1 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
>     { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I2U64", TGSI_OPCODE_I2U64 },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
>     { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
>     { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 },      /* removed */
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
>     { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
>     { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 },      /* removed */
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
> @@ -258,20 +258,42 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
>  };
>  
>  const struct tgsi_opcode_info *
>  tgsi_get_opcode_info( uint opcode )
>  {
>     static boolean firsttime = 1;
>  
>     if (firsttime) {
>        unsigned i;
>        firsttime = 0;
> @@ -375,20 +397,26 @@ tgsi_opcode_infer_type( uint opcode )
>     case TGSI_OPCODE_UARL:
>     case TGSI_OPCODE_IABS:
>     case TGSI_OPCODE_ISSG:
>     case TGSI_OPCODE_IMUL_HI:
>     case TGSI_OPCODE_IBFE:
>     case TGSI_OPCODE_IMSB:
>     case TGSI_OPCODE_DSEQ:
>     case TGSI_OPCODE_DSGE:
>     case TGSI_OPCODE_DSLT:
>     case TGSI_OPCODE_DSNE:
> +   case TGSI_OPCODE_U64SEQ:
> +   case TGSI_OPCODE_U64SNE:
> +   case TGSI_OPCODE_U64SLT:
> +   case TGSI_OPCODE_U64SGE:
> +   case TGSI_OPCODE_I64SLT:
> +   case TGSI_OPCODE_I64SGE:
>        return TGSI_TYPE_SIGNED;
>     case TGSI_OPCODE_DADD:
>     case TGSI_OPCODE_DABS:
>     case TGSI_OPCODE_DFMA:
>     case TGSI_OPCODE_DNEG:
>     case TGSI_OPCODE_DMUL:
>     case TGSI_OPCODE_DMAX:
>     case TGSI_OPCODE_DMIN:
>     case TGSI_OPCODE_DRCP:
>     case TGSI_OPCODE_DSQRT:
> @@ -398,21 +426,47 @@ tgsi_opcode_infer_type( uint opcode )
>     case TGSI_OPCODE_DFRAC:
>     case TGSI_OPCODE_DRSQ:
>     case TGSI_OPCODE_DTRUNC:
>     case TGSI_OPCODE_DCEIL:
>     case TGSI_OPCODE_DFLR:
>     case TGSI_OPCODE_DROUND:
>     case TGSI_OPCODE_DSSG:
>     case TGSI_OPCODE_F2D:
>     case TGSI_OPCODE_I2D:
>     case TGSI_OPCODE_U2D:
> +   case TGSI_OPCODE_U642D:
> +   case TGSI_OPCODE_I642D:
>        return TGSI_TYPE_DOUBLE;
> +   case TGSI_OPCODE_U64MAX:
> +   case TGSI_OPCODE_U64MIN:
> +   case TGSI_OPCODE_U64ADD:
> +   case TGSI_OPCODE_U64MUL:
> +   case TGSI_OPCODE_U64DIV:
> +   case TGSI_OPCODE_U64MOD:
> +   case TGSI_OPCODE_U64SHL:
> +   case TGSI_OPCODE_U64SHR:
> +   case TGSI_OPCODE_F2U64:
> +   case TGSI_OPCODE_I2U64:
> +   case TGSI_OPCODE_D2U64:
> +      return TGSI_TYPE_UNSIGNED64;
> +   case TGSI_OPCODE_I64MAX:
> +   case TGSI_OPCODE_I64MIN:
> +   case TGSI_OPCODE_I64ABS:
> +   case TGSI_OPCODE_I64SSG:
> +   case TGSI_OPCODE_I64NEG:
> +   case TGSI_OPCODE_I64SHR:
> +   case TGSI_OPCODE_I64DIV:
> +   case TGSI_OPCODE_I64MOD:
> +   case TGSI_OPCODE_F2I64:
> +   case TGSI_OPCODE_I2I64:
> +   case TGSI_OPCODE_D2I64:
> +      return TGSI_TYPE_SIGNED64;
>     default:
>        return TGSI_TYPE_FLOAT;
>     }
>  }
>  
>  /*
>   * infer the source type of a TGSI opcode.
>   */
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_src_type( uint opcode )
> @@ -427,41 +481,59 @@ tgsi_opcode_infer_src_type( uint opcode )
>     case TGSI_OPCODE_SWITCH:
>     case TGSI_OPCODE_CASE:
>     case TGSI_OPCODE_SAMPLE_I:
>     case TGSI_OPCODE_SAMPLE_I_MS:
>     case TGSI_OPCODE_UMUL_HI:
>     case TGSI_OPCODE_UP2H:
>        return TGSI_TYPE_UNSIGNED;
>     case TGSI_OPCODE_IMUL_HI:
>     case TGSI_OPCODE_I2F:
>     case TGSI_OPCODE_I2D:
> +   case TGSI_OPCODE_I2U64:
> +   case TGSI_OPCODE_I2I64:
>        return TGSI_TYPE_SIGNED;
>     case TGSI_OPCODE_ARL:
>     case TGSI_OPCODE_ARR:
>     case TGSI_OPCODE_TXQ_LZ:
>     case TGSI_OPCODE_F2D:
>     case TGSI_OPCODE_F2I:
>     case TGSI_OPCODE_F2U:
>     case TGSI_OPCODE_FSEQ:
>     case TGSI_OPCODE_FSGE:
>     case TGSI_OPCODE_FSLT:
>     case TGSI_OPCODE_FSNE:
>     case TGSI_OPCODE_UCMP:
> +   case TGSI_OPCODE_F2U64:
> +   case TGSI_OPCODE_F2I64:
>        return TGSI_TYPE_FLOAT;
>     case TGSI_OPCODE_D2F:
>     case TGSI_OPCODE_D2U:
>     case TGSI_OPCODE_D2I:
>     case TGSI_OPCODE_DSEQ:
>     case TGSI_OPCODE_DSGE:
>     case TGSI_OPCODE_DSLT:
>     case TGSI_OPCODE_DSNE:
> +   case TGSI_OPCODE_D2U64:
> +   case TGSI_OPCODE_D2I64:
>        return TGSI_TYPE_DOUBLE;
> +   case TGSI_OPCODE_U64SEQ:
> +   case TGSI_OPCODE_U64SNE:
> +   case TGSI_OPCODE_U64SLT:
> +   case TGSI_OPCODE_U64SGE:
> +   case TGSI_OPCODE_U642F:
> +   case TGSI_OPCODE_U642D:
> +      return TGSI_TYPE_UNSIGNED64;
> +   case TGSI_OPCODE_I64SLT:
> +   case TGSI_OPCODE_I64SGE:
> +   case TGSI_OPCODE_I642F:
> +   case TGSI_OPCODE_I642D:
> +            return TGSI_TYPE_SIGNED64;
>     default:
>        return tgsi_opcode_infer_type(opcode);
>     }
>  }
>  
>  /*
>   * infer the destination type of a TGSI opcode.
>   */
>  enum tgsi_opcode_type
>  tgsi_opcode_infer_dst_type( uint opcode )
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index c43bdfd..8830f5a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -91,21 +91,23 @@ tgsi_get_opcode_name( uint opcode );
>  
>  const char *
>  tgsi_get_processor_name( uint processor );
>  
>  enum tgsi_opcode_type {
>     TGSI_TYPE_UNTYPED, /* for MOV */
>     TGSI_TYPE_VOID,
>     TGSI_TYPE_UNSIGNED,
>     TGSI_TYPE_SIGNED,
>     TGSI_TYPE_FLOAT,
> -   TGSI_TYPE_DOUBLE
> +   TGSI_TYPE_DOUBLE,
> +   TGSI_TYPE_UNSIGNED64,
> +   TGSI_TYPE_SIGNED64,
>  };
>  
>  static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
>  {
>     if (type == TGSI_TYPE_DOUBLE)
>        return true;
>     return false;
>  }
>  
>  enum tgsi_opcode_type
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 881aef6..53ee4e4 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -2075,20 +2075,266 @@ Perform a * b + c with no intermediate rounding step.
>     dst.zw = double(src0.y)
>  
>  .. opcode:: D2U - Double to Unsigned Int
>  
>  .. math::
>  
>     dst.x = unsigned(src0.xy)
>  
>     dst.y = unsigned(src0.zw)
>  
> +64-bit Integer ISA
> +^^^^^^^^^^^^^^^^^^
> +
> +The 64-bit integer opcodes reinterpret four-component vectors into
> +two-component vectors with 64-bits in each component.
> +
> +.. opcode:: I64ABS - Absolute
> +
> +  dst.xy = |src0.xy|
> +  dst.zw = |src0.zw|
> +
> +.. opcode:: I64NEG - 64-bit Integer Negate
> +
> +  Two's complement.
> +
> +.. math::
> +
> +  dst.xy = -src.xy
> +  dst.zw = -src.zw
> +
> +.. opcode:: I64SSG - 64-bit Integer Set Sign
> +
> +.. math::
> +
> +  dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
> +  dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
> +
> +.. opcode:: U64ADD - Add
> +
> +.. math::
> +
> +  dst.xy = src0.xy + src1.xy
> +  dst.zw = src0.zw + src1.zw
> +
> +.. opcode:: U64MUL - Multiply
> +
> +.. math::
> +
> +  dst.xy = src0.xy * src1.xy
> +  dst.zw = src0.zw * src1.zw
> +
> +.. opcode:: U64SEQ - Set on Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy == src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw == src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SNE - Set on Equal
Set on Not Equal.
Also maybe mention 64-bit more consequently in the description? If it's
mentioned or not looks totally random.







> +
> +.. math::
> +
> +  dst.x = src0.xy != src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw != src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than
> +
> +.. math::
> +
> +  dst.x = src0.xy < src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SLT - 64-bit Integer Set on Less Than
> +
> +.. math::
> +
> +  dst.x = src0.xy < src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SGE - 64-bit Integer Set on Greater Equal
> +
> +.. math::
> +
> +  dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> +  dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64MIN - Minimum of 64-bit Signed Integers
> +
> +.. math::
> +
> +  dst.xy = min(src0.xy, src1.xy)
> +  dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> +  dst.xy = min(src0.xy, src1.xy)
> +  dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: I64MAX - Maximum of 64-bit Signed Integers
> +
> +.. math::
> +
> +  dst.xy = max(src0.xy, src1.xy)
> +  dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> +  dst.xy = max(src0.xy, src1.xy)
> +  dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer
> +
> +   The shift count is masked with 0x1f before the shift is applied.
0x3f (below more of the same).
(I've got some feeling I've commented on that already last time...)

> +
> +.. math::
> +
> +  dst.xy = src0.xy << (0x1f \& src1.x)
> +  dst.zw = src0.zw << (0x1f \& src1.y)
> +
> +.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer)
> +
> +   The shift count is masked with 0x1f before the shift is applied.
> +
> +.. math::
> +
> +  dst.xy = src0.xy >> (0x1f \& src1.x)
> +  dst.zw = src0.zw >> (0x1f \& src1.y)
> +
> +.. opcode:: U64SHR - Logical Shift Right
> +
> +   The shift count is masked with 0x1f before the shift is applied.
> +
> +.. math::
> +
> +  dst.xy = src0.xy >> (unsigned) (0x1f \& src1.x)
> +  dst.zw = src0.zw >> (unsigned) (0x1f \& src1.y)
> +
> +.. opcode:: I64DIV - 64-bit Signed Integer Division
> +
> +   TBD: behavior for division by zero.
> +
> +.. math::
> +
> +  dst.xy = src0.xy \ src1.xy
> +  dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64DIV - 64-bit Unsigned Integer Division
> +
> +   For division by zero, 0xffffffff is returned.
I suppose that should be 64bit worth of ones (below too).

> +
> +.. math::
> +
> +  dst.xy = src0.xy \ src1.xy
> +  dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder
> +
> +   If second arg is zero, 0xffffffff is returned.
> +
> +.. math::
> +
> +  dst.xy = src0.xy \bmod src1.xy
> +  dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: I64MOD - Modulus - 64-bit Signed Integer
> +
> +.. math::
> +
> +  dst.xy = src0.xy \bmod src1.xy
> +  dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: F2U64 - Float to 64-bit Unsigned Int
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.x
> +   dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: F2I64 - Float to 64-bit Int
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.x
> +   dst.zw = (int64_t) src0.y
> +
> +.. opcode:: I2U64 - Integer to 64-bit Unsigned Int
That looks odd. Sign-extend of int to 64bit would give same result as I2I64.
I think instead we'd need a U2I64 (or U2U64 which would be identical).

> +   This is a sign extension.
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.x
> +   dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: I2I64 - Integer to 64-bit Int
> +
> +   This is a sign extension.
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.x
> +   dst.zw = (int64_t) src0.y
> +
> +.. opcode:: D2U64 - Double to 64-bit Unsigned Int
> +
> +.. math::
> +
> +   dst.xy = (uint64_t) src0.xy
> +   dst.zw = (uint64_t) src0.zw
> +
> +.. opcode:: D2I64 - Double to 64-bit Int
> +
> +.. math::
> +
> +   dst.xy = (int64_t) src0.xy
> +   dst.zw = (int64_t) src0.zw
> +
> +.. opcode:: U642F - 64-bit unsigned integer to float
> +
> +.. math::
> +
> +   dst.x = (float) src0.xy
> +   dst.y = (float) src0.zw
> +
> +.. opcode:: I642F - 64-bit Int to Float
> +
> +.. math::
> +
> +   dst.x = (float) src0.xy
> +   dst.y = (float) src0.zw
> +
> +.. opcode:: U642D - 64-bit unsigned integer to double
> +
> +.. math::
> +
> +   dst.xy = (double) src0.xy
> +   dst.zw = (double) src0.zw
> +
> +.. opcode:: I642D - 64-bit Int to double
> +
> +.. math::
> +
> +   dst.xy = (double) src0.xy
> +   dst.zw = (double) src0.zw
> +
>  .. _samplingopcodes:
>  
>  Resource Sampling Opcodes
>  ^^^^^^^^^^^^^^^^^^^^^^^^^
>  
>  Those opcodes follow very closely semantics of the respective Direct3D
>  instructions. If in doubt double check Direct3D documentation.
>  Note that the swizzle on SVIEW (src1) determines texel swizzling
>  after lookup.
>  
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 39ce9ea..e8589cb 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -338,59 +338,61 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_MIN                 12
>  #define TGSI_OPCODE_MAX                 13
>  #define TGSI_OPCODE_SLT                 14
>  #define TGSI_OPCODE_SGE                 15
>  #define TGSI_OPCODE_MAD                 16
>  #define TGSI_OPCODE_SUB                 17
>  #define TGSI_OPCODE_LRP                 18
>  #define TGSI_OPCODE_FMA                 19
>  #define TGSI_OPCODE_SQRT                20
>  #define TGSI_OPCODE_DP2A                21
> -                                /* gap */
> +#define TGSI_OPCODE_F2U64               22
> +#define TGSI_OPCODE_F2I64               23
>  #define TGSI_OPCODE_FRC                 24
>  #define TGSI_OPCODE_CLAMP               25
>  #define TGSI_OPCODE_FLR                 26
>  #define TGSI_OPCODE_ROUND               27
>  #define TGSI_OPCODE_EX2                 28
>  #define TGSI_OPCODE_LG2                 29
>  #define TGSI_OPCODE_POW                 30
>  #define TGSI_OPCODE_XPD                 31
> -                                /* gap */
> +#define TGSI_OPCODE_I2U64               32
>  #define TGSI_OPCODE_ABS                 33
> -                                /* gap */
> +#define TGSI_OPCODE_I2I64               34
>  #define TGSI_OPCODE_DPH                 35
>  #define TGSI_OPCODE_COS                 36
>  #define TGSI_OPCODE_DDX                 37
>  #define TGSI_OPCODE_DDY                 38
>  #define TGSI_OPCODE_KILL                39 /* unconditional */
>  #define TGSI_OPCODE_PK2H                40
>  #define TGSI_OPCODE_PK2US               41
>  #define TGSI_OPCODE_PK4B                42
>  #define TGSI_OPCODE_PK4UB               43
> -                                /* gap */
> +#define TGSI_OPCODE_D2U64               44
>  #define TGSI_OPCODE_SEQ                 45
> -                                /* gap */
> +#define TGSI_OPCODE_D2I64               46
>  #define TGSI_OPCODE_SGT                 47
>  #define TGSI_OPCODE_SIN                 48
>  #define TGSI_OPCODE_SLE                 49
>  #define TGSI_OPCODE_SNE                 50
> -                                /* gap */
> +#define TGSI_OPCODE_U642D               51
>  #define TGSI_OPCODE_TEX                 52
>  #define TGSI_OPCODE_TXD                 53
>  #define TGSI_OPCODE_TXP                 54
>  #define TGSI_OPCODE_UP2H                55
>  #define TGSI_OPCODE_UP2US               56
>  #define TGSI_OPCODE_UP4B                57
>  #define TGSI_OPCODE_UP4UB               58
> -                                /* gap */
> +#define TGSI_OPCODE_U642F               59
> +#define TGSI_OPCODE_I642F               60
>  #define TGSI_OPCODE_ARR                 61
> -                                /* gap */
> +#define TGSI_OPCODE_I642D               62
>  #define TGSI_OPCODE_CAL                 63
>  #define TGSI_OPCODE_RET                 64
>  #define TGSI_OPCODE_SSG                 65 /* SGN */
>  #define TGSI_OPCODE_CMP                 66
>  #define TGSI_OPCODE_SCS                 67
>  #define TGSI_OPCODE_TXB                 68
>                                  /* gap */
>  #define TGSI_OPCODE_DIV                 70
>  #define TGSI_OPCODE_DP2                 71
>  #define TGSI_OPCODE_TXL                 72
> @@ -561,21 +563,47 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_DTRUNC              218 /* nvc0 */
>  #define TGSI_OPCODE_DCEIL               219 /* nvc0 */
>  #define TGSI_OPCODE_DFLR                220 /* nvc0 */
>  #define TGSI_OPCODE_DROUND              221 /* nvc0 */
>  #define TGSI_OPCODE_DSSG                222
>  
>  #define TGSI_OPCODE_VOTE_ANY            223
>  #define TGSI_OPCODE_VOTE_ALL            224
>  #define TGSI_OPCODE_VOTE_EQ             225
>  
> -#define TGSI_OPCODE_LAST                226
> +#define TGSI_OPCODE_U64SEQ              226
> +#define TGSI_OPCODE_U64SNE              227
> +#define TGSI_OPCODE_I64SLT              228
> +#define TGSI_OPCODE_U64SLT              229
> +#define TGSI_OPCODE_I64SGE              230
> +#define TGSI_OPCODE_U64SGE              231
> +
> +#define TGSI_OPCODE_I64MIN              232
> +#define TGSI_OPCODE_U64MIN              233
> +#define TGSI_OPCODE_I64MAX              234
> +#define TGSI_OPCODE_U64MAX              235
> +
> +#define TGSI_OPCODE_I64ABS              236
> +#define TGSI_OPCODE_I64SSG              237
> +#define TGSI_OPCODE_I64NEG              238
> +
> +#define TGSI_OPCODE_U64ADD              239
> +#define TGSI_OPCODE_U64MUL              240
> +#define TGSI_OPCODE_U64SHL              241
> +#define TGSI_OPCODE_I64SHR              242
> +#define TGSI_OPCODE_U64SHR              243
> +
> +#define TGSI_OPCODE_I64DIV              244
> +#define TGSI_OPCODE_U64DIV              245
> +#define TGSI_OPCODE_I64MOD              246
> +#define TGSI_OPCODE_U64MOD              247
> +#define TGSI_OPCODE_LAST                248
>  
>  /**
>   * Opcode is the operation code to execute. A given operation defines the
>   * semantics how the source registers (if any) are interpreted and what is
>   * written to the destination registers (if any) as a result of execution.
>   *
>   * NumDstRegs and NumSrcRegs is the number of destination and source registers,
>   * respectively. For a given operation code, those numbers are fixed and are
>   * present here only for convenience.
>   *
> 

Other than that, the "random" opcode numbers are still ugly, but
seemingly unfixable due to running out of opcodes. Might still want to
increase that to 9bits though at some point - there's only around 10
opcodes left after this...

Roland



More information about the mesa-dev mailing list