[Mesa-dev] [PATCH 1/9] gallium: add opcode and types for 64-bit integers. (v2)
Roland Scheidegger
sroland at vmware.com
Fri Sep 16 15:34:50 UTC 2016
Am 16.09.2016 um 15:48 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
>
> This just adds the basic support for 64-bit opcodes,
> and the new types.
>
> v2: add conversion opcodes.
> add documentation.
>
> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
> Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/auxiliary/tgsi/tgsi_info.c | 92 +++++++++--
> src/gallium/auxiliary/tgsi/tgsi_info.h | 4 +-
> src/gallium/docs/source/tgsi.rst | 246 +++++++++++++++++++++++++++++
> src/gallium/include/pipe/p_shader_tokens.h | 46 ++++--
> 4 files changed, 368 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 60e0f2c..e319be1 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
> { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2U64", TGSI_OPCODE_I2U64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
> { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
> { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
> { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
> { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
> @@ -258,20 +258,42 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
> };
>
> const struct tgsi_opcode_info *
> tgsi_get_opcode_info( uint opcode )
> {
> static boolean firsttime = 1;
>
> if (firsttime) {
> unsigned i;
> firsttime = 0;
> @@ -375,20 +397,26 @@ tgsi_opcode_infer_type( uint opcode )
> case TGSI_OPCODE_UARL:
> case TGSI_OPCODE_IABS:
> case TGSI_OPCODE_ISSG:
> case TGSI_OPCODE_IMUL_HI:
> case TGSI_OPCODE_IBFE:
> case TGSI_OPCODE_IMSB:
> case TGSI_OPCODE_DSEQ:
> case TGSI_OPCODE_DSGE:
> case TGSI_OPCODE_DSLT:
> case TGSI_OPCODE_DSNE:
> + case TGSI_OPCODE_U64SEQ:
> + case TGSI_OPCODE_U64SNE:
> + case TGSI_OPCODE_U64SLT:
> + case TGSI_OPCODE_U64SGE:
> + case TGSI_OPCODE_I64SLT:
> + case TGSI_OPCODE_I64SGE:
> return TGSI_TYPE_SIGNED;
> case TGSI_OPCODE_DADD:
> case TGSI_OPCODE_DABS:
> case TGSI_OPCODE_DFMA:
> case TGSI_OPCODE_DNEG:
> case TGSI_OPCODE_DMUL:
> case TGSI_OPCODE_DMAX:
> case TGSI_OPCODE_DMIN:
> case TGSI_OPCODE_DRCP:
> case TGSI_OPCODE_DSQRT:
> @@ -398,21 +426,47 @@ tgsi_opcode_infer_type( uint opcode )
> case TGSI_OPCODE_DFRAC:
> case TGSI_OPCODE_DRSQ:
> case TGSI_OPCODE_DTRUNC:
> case TGSI_OPCODE_DCEIL:
> case TGSI_OPCODE_DFLR:
> case TGSI_OPCODE_DROUND:
> case TGSI_OPCODE_DSSG:
> case TGSI_OPCODE_F2D:
> case TGSI_OPCODE_I2D:
> case TGSI_OPCODE_U2D:
> + case TGSI_OPCODE_U642D:
> + case TGSI_OPCODE_I642D:
> return TGSI_TYPE_DOUBLE;
> + case TGSI_OPCODE_U64MAX:
> + case TGSI_OPCODE_U64MIN:
> + case TGSI_OPCODE_U64ADD:
> + case TGSI_OPCODE_U64MUL:
> + case TGSI_OPCODE_U64DIV:
> + case TGSI_OPCODE_U64MOD:
> + case TGSI_OPCODE_U64SHL:
> + case TGSI_OPCODE_U64SHR:
> + case TGSI_OPCODE_F2U64:
> + case TGSI_OPCODE_I2U64:
> + case TGSI_OPCODE_D2U64:
> + return TGSI_TYPE_UNSIGNED64;
> + case TGSI_OPCODE_I64MAX:
> + case TGSI_OPCODE_I64MIN:
> + case TGSI_OPCODE_I64ABS:
> + case TGSI_OPCODE_I64SSG:
> + case TGSI_OPCODE_I64NEG:
> + case TGSI_OPCODE_I64SHR:
> + case TGSI_OPCODE_I64DIV:
> + case TGSI_OPCODE_I64MOD:
> + case TGSI_OPCODE_F2I64:
> + case TGSI_OPCODE_I2I64:
> + case TGSI_OPCODE_D2I64:
> + return TGSI_TYPE_SIGNED64;
> default:
> return TGSI_TYPE_FLOAT;
> }
> }
>
> /*
> * infer the source type of a TGSI opcode.
> */
> enum tgsi_opcode_type
> tgsi_opcode_infer_src_type( uint opcode )
> @@ -427,41 +481,59 @@ tgsi_opcode_infer_src_type( uint opcode )
> case TGSI_OPCODE_SWITCH:
> case TGSI_OPCODE_CASE:
> case TGSI_OPCODE_SAMPLE_I:
> case TGSI_OPCODE_SAMPLE_I_MS:
> case TGSI_OPCODE_UMUL_HI:
> case TGSI_OPCODE_UP2H:
> return TGSI_TYPE_UNSIGNED;
> case TGSI_OPCODE_IMUL_HI:
> case TGSI_OPCODE_I2F:
> case TGSI_OPCODE_I2D:
> + case TGSI_OPCODE_I2U64:
> + case TGSI_OPCODE_I2I64:
> return TGSI_TYPE_SIGNED;
> case TGSI_OPCODE_ARL:
> case TGSI_OPCODE_ARR:
> case TGSI_OPCODE_TXQ_LZ:
> case TGSI_OPCODE_F2D:
> case TGSI_OPCODE_F2I:
> case TGSI_OPCODE_F2U:
> case TGSI_OPCODE_FSEQ:
> case TGSI_OPCODE_FSGE:
> case TGSI_OPCODE_FSLT:
> case TGSI_OPCODE_FSNE:
> case TGSI_OPCODE_UCMP:
> + case TGSI_OPCODE_F2U64:
> + case TGSI_OPCODE_F2I64:
> return TGSI_TYPE_FLOAT;
> case TGSI_OPCODE_D2F:
> case TGSI_OPCODE_D2U:
> case TGSI_OPCODE_D2I:
> case TGSI_OPCODE_DSEQ:
> case TGSI_OPCODE_DSGE:
> case TGSI_OPCODE_DSLT:
> case TGSI_OPCODE_DSNE:
> + case TGSI_OPCODE_D2U64:
> + case TGSI_OPCODE_D2I64:
> return TGSI_TYPE_DOUBLE;
> + case TGSI_OPCODE_U64SEQ:
> + case TGSI_OPCODE_U64SNE:
> + case TGSI_OPCODE_U64SLT:
> + case TGSI_OPCODE_U64SGE:
> + case TGSI_OPCODE_U642F:
> + case TGSI_OPCODE_U642D:
> + return TGSI_TYPE_UNSIGNED64;
> + case TGSI_OPCODE_I64SLT:
> + case TGSI_OPCODE_I64SGE:
> + case TGSI_OPCODE_I642F:
> + case TGSI_OPCODE_I642D:
> + return TGSI_TYPE_SIGNED64;
> default:
> return tgsi_opcode_infer_type(opcode);
> }
> }
>
> /*
> * infer the destination type of a TGSI opcode.
> */
> enum tgsi_opcode_type
> tgsi_opcode_infer_dst_type( uint opcode )
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index c43bdfd..8830f5a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -91,21 +91,23 @@ tgsi_get_opcode_name( uint opcode );
>
> const char *
> tgsi_get_processor_name( uint processor );
>
> enum tgsi_opcode_type {
> TGSI_TYPE_UNTYPED, /* for MOV */
> TGSI_TYPE_VOID,
> TGSI_TYPE_UNSIGNED,
> TGSI_TYPE_SIGNED,
> TGSI_TYPE_FLOAT,
> - TGSI_TYPE_DOUBLE
> + TGSI_TYPE_DOUBLE,
> + TGSI_TYPE_UNSIGNED64,
> + TGSI_TYPE_SIGNED64,
> };
>
> static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
> {
> if (type == TGSI_TYPE_DOUBLE)
> return true;
> return false;
> }
>
> enum tgsi_opcode_type
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 881aef6..53ee4e4 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -2075,20 +2075,266 @@ Perform a * b + c with no intermediate rounding step.
> dst.zw = double(src0.y)
>
> .. opcode:: D2U - Double to Unsigned Int
>
> .. math::
>
> dst.x = unsigned(src0.xy)
>
> dst.y = unsigned(src0.zw)
>
> +64-bit Integer ISA
> +^^^^^^^^^^^^^^^^^^
> +
> +The 64-bit integer opcodes reinterpret four-component vectors into
> +two-component vectors with 64-bits in each component.
> +
> +.. opcode:: I64ABS - Absolute
> +
> + dst.xy = |src0.xy|
> + dst.zw = |src0.zw|
> +
> +.. opcode:: I64NEG - 64-bit Integer Negate
> +
> + Two's complement.
> +
> +.. math::
> +
> + dst.xy = -src.xy
> + dst.zw = -src.zw
> +
> +.. opcode:: I64SSG - 64-bit Integer Set Sign
> +
> +.. math::
> +
> + dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
> + dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
> +
> +.. opcode:: U64ADD - Add
> +
> +.. math::
> +
> + dst.xy = src0.xy + src1.xy
> + dst.zw = src0.zw + src1.zw
> +
> +.. opcode:: U64MUL - Multiply
> +
> +.. math::
> +
> + dst.xy = src0.xy * src1.xy
> + dst.zw = src0.zw * src1.zw
> +
> +.. opcode:: U64SEQ - Set on Equal
> +
> +.. math::
> +
> + dst.x = src0.xy == src1.xy ? \sim 0 : 0
> + dst.z = src0.zw == src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SNE - Set on Equal
Set on Not Equal.
Also maybe mention 64-bit more consequently in the description? If it's
mentioned or not looks totally random.
> +
> +.. math::
> +
> + dst.x = src0.xy != src1.xy ? \sim 0 : 0
> + dst.z = src0.zw != src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than
> +
> +.. math::
> +
> + dst.x = src0.xy < src1.xy ? \sim 0 : 0
> + dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal
> +
> +.. math::
> +
> + dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> + dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SLT - 64-bit Integer Set on Less Than
> +
> +.. math::
> +
> + dst.x = src0.xy < src1.xy ? \sim 0 : 0
> + dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SGE - 64-bit Integer Set on Greater Equal
> +
> +.. math::
> +
> + dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> + dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64MIN - Minimum of 64-bit Signed Integers
> +
> +.. math::
> +
> + dst.xy = min(src0.xy, src1.xy)
> + dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> + dst.xy = min(src0.xy, src1.xy)
> + dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: I64MAX - Maximum of 64-bit Signed Integers
> +
> +.. math::
> +
> + dst.xy = max(src0.xy, src1.xy)
> + dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> + dst.xy = max(src0.xy, src1.xy)
> + dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer
> +
> + The shift count is masked with 0x1f before the shift is applied.
0x3f (below more of the same).
(I've got some feeling I've commented on that already last time...)
> +
> +.. math::
> +
> + dst.xy = src0.xy << (0x1f \& src1.x)
> + dst.zw = src0.zw << (0x1f \& src1.y)
> +
> +.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer)
> +
> + The shift count is masked with 0x1f before the shift is applied.
> +
> +.. math::
> +
> + dst.xy = src0.xy >> (0x1f \& src1.x)
> + dst.zw = src0.zw >> (0x1f \& src1.y)
> +
> +.. opcode:: U64SHR - Logical Shift Right
> +
> + The shift count is masked with 0x1f before the shift is applied.
> +
> +.. math::
> +
> + dst.xy = src0.xy >> (unsigned) (0x1f \& src1.x)
> + dst.zw = src0.zw >> (unsigned) (0x1f \& src1.y)
> +
> +.. opcode:: I64DIV - 64-bit Signed Integer Division
> +
> + TBD: behavior for division by zero.
> +
> +.. math::
> +
> + dst.xy = src0.xy \ src1.xy
> + dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64DIV - 64-bit Unsigned Integer Division
> +
> + For division by zero, 0xffffffff is returned.
I suppose that should be 64bit worth of ones (below too).
> +
> +.. math::
> +
> + dst.xy = src0.xy \ src1.xy
> + dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder
> +
> + If second arg is zero, 0xffffffff is returned.
> +
> +.. math::
> +
> + dst.xy = src0.xy \bmod src1.xy
> + dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: I64MOD - Modulus - 64-bit Signed Integer
> +
> +.. math::
> +
> + dst.xy = src0.xy \bmod src1.xy
> + dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: F2U64 - Float to 64-bit Unsigned Int
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.x
> + dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: F2I64 - Float to 64-bit Int
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.x
> + dst.zw = (int64_t) src0.y
> +
> +.. opcode:: I2U64 - Integer to 64-bit Unsigned Int
That looks odd. Sign-extend of int to 64bit would give same result as I2I64.
I think instead we'd need a U2I64 (or U2U64 which would be identical).
> + This is a sign extension.
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.x
> + dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: I2I64 - Integer to 64-bit Int
> +
> + This is a sign extension.
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.x
> + dst.zw = (int64_t) src0.y
> +
> +.. opcode:: D2U64 - Double to 64-bit Unsigned Int
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.xy
> + dst.zw = (uint64_t) src0.zw
> +
> +.. opcode:: D2I64 - Double to 64-bit Int
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.xy
> + dst.zw = (int64_t) src0.zw
> +
> +.. opcode:: U642F - 64-bit unsigned integer to float
> +
> +.. math::
> +
> + dst.x = (float) src0.xy
> + dst.y = (float) src0.zw
> +
> +.. opcode:: I642F - 64-bit Int to Float
> +
> +.. math::
> +
> + dst.x = (float) src0.xy
> + dst.y = (float) src0.zw
> +
> +.. opcode:: U642D - 64-bit unsigned integer to double
> +
> +.. math::
> +
> + dst.xy = (double) src0.xy
> + dst.zw = (double) src0.zw
> +
> +.. opcode:: I642D - 64-bit Int to double
> +
> +.. math::
> +
> + dst.xy = (double) src0.xy
> + dst.zw = (double) src0.zw
> +
> .. _samplingopcodes:
>
> Resource Sampling Opcodes
> ^^^^^^^^^^^^^^^^^^^^^^^^^
>
> Those opcodes follow very closely semantics of the respective Direct3D
> instructions. If in doubt double check Direct3D documentation.
> Note that the swizzle on SVIEW (src1) determines texel swizzling
> after lookup.
>
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 39ce9ea..e8589cb 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -338,59 +338,61 @@ struct tgsi_property_data {
> #define TGSI_OPCODE_MIN 12
> #define TGSI_OPCODE_MAX 13
> #define TGSI_OPCODE_SLT 14
> #define TGSI_OPCODE_SGE 15
> #define TGSI_OPCODE_MAD 16
> #define TGSI_OPCODE_SUB 17
> #define TGSI_OPCODE_LRP 18
> #define TGSI_OPCODE_FMA 19
> #define TGSI_OPCODE_SQRT 20
> #define TGSI_OPCODE_DP2A 21
> - /* gap */
> +#define TGSI_OPCODE_F2U64 22
> +#define TGSI_OPCODE_F2I64 23
> #define TGSI_OPCODE_FRC 24
> #define TGSI_OPCODE_CLAMP 25
> #define TGSI_OPCODE_FLR 26
> #define TGSI_OPCODE_ROUND 27
> #define TGSI_OPCODE_EX2 28
> #define TGSI_OPCODE_LG2 29
> #define TGSI_OPCODE_POW 30
> #define TGSI_OPCODE_XPD 31
> - /* gap */
> +#define TGSI_OPCODE_I2U64 32
> #define TGSI_OPCODE_ABS 33
> - /* gap */
> +#define TGSI_OPCODE_I2I64 34
> #define TGSI_OPCODE_DPH 35
> #define TGSI_OPCODE_COS 36
> #define TGSI_OPCODE_DDX 37
> #define TGSI_OPCODE_DDY 38
> #define TGSI_OPCODE_KILL 39 /* unconditional */
> #define TGSI_OPCODE_PK2H 40
> #define TGSI_OPCODE_PK2US 41
> #define TGSI_OPCODE_PK4B 42
> #define TGSI_OPCODE_PK4UB 43
> - /* gap */
> +#define TGSI_OPCODE_D2U64 44
> #define TGSI_OPCODE_SEQ 45
> - /* gap */
> +#define TGSI_OPCODE_D2I64 46
> #define TGSI_OPCODE_SGT 47
> #define TGSI_OPCODE_SIN 48
> #define TGSI_OPCODE_SLE 49
> #define TGSI_OPCODE_SNE 50
> - /* gap */
> +#define TGSI_OPCODE_U642D 51
> #define TGSI_OPCODE_TEX 52
> #define TGSI_OPCODE_TXD 53
> #define TGSI_OPCODE_TXP 54
> #define TGSI_OPCODE_UP2H 55
> #define TGSI_OPCODE_UP2US 56
> #define TGSI_OPCODE_UP4B 57
> #define TGSI_OPCODE_UP4UB 58
> - /* gap */
> +#define TGSI_OPCODE_U642F 59
> +#define TGSI_OPCODE_I642F 60
> #define TGSI_OPCODE_ARR 61
> - /* gap */
> +#define TGSI_OPCODE_I642D 62
> #define TGSI_OPCODE_CAL 63
> #define TGSI_OPCODE_RET 64
> #define TGSI_OPCODE_SSG 65 /* SGN */
> #define TGSI_OPCODE_CMP 66
> #define TGSI_OPCODE_SCS 67
> #define TGSI_OPCODE_TXB 68
> /* gap */
> #define TGSI_OPCODE_DIV 70
> #define TGSI_OPCODE_DP2 71
> #define TGSI_OPCODE_TXL 72
> @@ -561,21 +563,47 @@ struct tgsi_property_data {
> #define TGSI_OPCODE_DTRUNC 218 /* nvc0 */
> #define TGSI_OPCODE_DCEIL 219 /* nvc0 */
> #define TGSI_OPCODE_DFLR 220 /* nvc0 */
> #define TGSI_OPCODE_DROUND 221 /* nvc0 */
> #define TGSI_OPCODE_DSSG 222
>
> #define TGSI_OPCODE_VOTE_ANY 223
> #define TGSI_OPCODE_VOTE_ALL 224
> #define TGSI_OPCODE_VOTE_EQ 225
>
> -#define TGSI_OPCODE_LAST 226
> +#define TGSI_OPCODE_U64SEQ 226
> +#define TGSI_OPCODE_U64SNE 227
> +#define TGSI_OPCODE_I64SLT 228
> +#define TGSI_OPCODE_U64SLT 229
> +#define TGSI_OPCODE_I64SGE 230
> +#define TGSI_OPCODE_U64SGE 231
> +
> +#define TGSI_OPCODE_I64MIN 232
> +#define TGSI_OPCODE_U64MIN 233
> +#define TGSI_OPCODE_I64MAX 234
> +#define TGSI_OPCODE_U64MAX 235
> +
> +#define TGSI_OPCODE_I64ABS 236
> +#define TGSI_OPCODE_I64SSG 237
> +#define TGSI_OPCODE_I64NEG 238
> +
> +#define TGSI_OPCODE_U64ADD 239
> +#define TGSI_OPCODE_U64MUL 240
> +#define TGSI_OPCODE_U64SHL 241
> +#define TGSI_OPCODE_I64SHR 242
> +#define TGSI_OPCODE_U64SHR 243
> +
> +#define TGSI_OPCODE_I64DIV 244
> +#define TGSI_OPCODE_U64DIV 245
> +#define TGSI_OPCODE_I64MOD 246
> +#define TGSI_OPCODE_U64MOD 247
> +#define TGSI_OPCODE_LAST 248
>
> /**
> * Opcode is the operation code to execute. A given operation defines the
> * semantics how the source registers (if any) are interpreted and what is
> * written to the destination registers (if any) as a result of execution.
> *
> * NumDstRegs and NumSrcRegs is the number of destination and source registers,
> * respectively. For a given operation code, those numbers are fixed and are
> * present here only for convenience.
> *
>
Other than that, the "random" opcode numbers are still ugly, but
seemingly unfixable due to running out of opcodes. Might still want to
increase that to 9bits though at some point - there's only around 10
opcodes left after this...
Roland
More information about the mesa-dev
mailing list