[Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)
Roland Scheidegger
sroland at vmware.com
Mon Sep 19 16:18:56 UTC 2016
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie <airlied at redhat.com>
>
> This just adds the basic support for 64-bit opcodes,
> and the new types.
>
> v2: add conversion opcodes.
> add documentation.
> v3:
> - make docs more consistent
> - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64
>
> Reviewed-by: Marek Olšák <marek.olsak at amd.com> (v2)
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/auxiliary/tgsi/tgsi_info.c | 92 +++++++++--
> src/gallium/auxiliary/tgsi/tgsi_info.h | 4 +-
> src/gallium/docs/source/tgsi.rst | 240 +++++++++++++++++++++++++++++
> src/gallium/include/pipe/p_shader_tokens.h | 46 ++++--
> 4 files changed, 362 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 60e0f2c..18e1bc8 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
> { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
> - { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
> { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
> - { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
> + { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
> { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
> { 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
> { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
> @@ -258,20 +258,42 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
> { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
> + { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
> + { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
> };
>
> const struct tgsi_opcode_info *
> tgsi_get_opcode_info( uint opcode )
> {
> static boolean firsttime = 1;
>
> if (firsttime) {
> unsigned i;
> firsttime = 0;
> @@ -375,20 +397,26 @@ tgsi_opcode_infer_type( uint opcode )
> case TGSI_OPCODE_UARL:
> case TGSI_OPCODE_IABS:
> case TGSI_OPCODE_ISSG:
> case TGSI_OPCODE_IMUL_HI:
> case TGSI_OPCODE_IBFE:
> case TGSI_OPCODE_IMSB:
> case TGSI_OPCODE_DSEQ:
> case TGSI_OPCODE_DSGE:
> case TGSI_OPCODE_DSLT:
> case TGSI_OPCODE_DSNE:
> + case TGSI_OPCODE_U64SEQ:
> + case TGSI_OPCODE_U64SNE:
> + case TGSI_OPCODE_U64SLT:
> + case TGSI_OPCODE_U64SGE:
> + case TGSI_OPCODE_I64SLT:
> + case TGSI_OPCODE_I64SGE:
> return TGSI_TYPE_SIGNED;
> case TGSI_OPCODE_DADD:
> case TGSI_OPCODE_DABS:
> case TGSI_OPCODE_DFMA:
> case TGSI_OPCODE_DNEG:
> case TGSI_OPCODE_DMUL:
> case TGSI_OPCODE_DMAX:
> case TGSI_OPCODE_DMIN:
> case TGSI_OPCODE_DRCP:
> case TGSI_OPCODE_DSQRT:
> @@ -398,21 +426,47 @@ tgsi_opcode_infer_type( uint opcode )
> case TGSI_OPCODE_DFRAC:
> case TGSI_OPCODE_DRSQ:
> case TGSI_OPCODE_DTRUNC:
> case TGSI_OPCODE_DCEIL:
> case TGSI_OPCODE_DFLR:
> case TGSI_OPCODE_DROUND:
> case TGSI_OPCODE_DSSG:
> case TGSI_OPCODE_F2D:
> case TGSI_OPCODE_I2D:
> case TGSI_OPCODE_U2D:
> + case TGSI_OPCODE_U642D:
> + case TGSI_OPCODE_I642D:
> return TGSI_TYPE_DOUBLE;
> + case TGSI_OPCODE_U64MAX:
> + case TGSI_OPCODE_U64MIN:
> + case TGSI_OPCODE_U64ADD:
> + case TGSI_OPCODE_U64MUL:
> + case TGSI_OPCODE_U64DIV:
> + case TGSI_OPCODE_U64MOD:
> + case TGSI_OPCODE_U64SHL:
> + case TGSI_OPCODE_U64SHR:
> + case TGSI_OPCODE_F2U64:
> + case TGSI_OPCODE_D2U64:
> + return TGSI_TYPE_UNSIGNED64;
> + case TGSI_OPCODE_I64MAX:
> + case TGSI_OPCODE_I64MIN:
> + case TGSI_OPCODE_I64ABS:
> + case TGSI_OPCODE_I64SSG:
> + case TGSI_OPCODE_I64NEG:
> + case TGSI_OPCODE_I64SHR:
> + case TGSI_OPCODE_I64DIV:
> + case TGSI_OPCODE_I64MOD:
> + case TGSI_OPCODE_F2I64:
> + case TGSI_OPCODE_U2I64:
> + case TGSI_OPCODE_I2I64:
> + case TGSI_OPCODE_D2I64:
> + return TGSI_TYPE_SIGNED64;
> default:
> return TGSI_TYPE_FLOAT;
> }
> }
>
> /*
> * infer the source type of a TGSI opcode.
> */
> enum tgsi_opcode_type
> tgsi_opcode_infer_src_type( uint opcode )
> @@ -423,45 +477,63 @@ tgsi_opcode_infer_src_type( uint opcode )
> case TGSI_OPCODE_BREAKC:
> case TGSI_OPCODE_U2F:
> case TGSI_OPCODE_U2D:
> case TGSI_OPCODE_UADD:
> case TGSI_OPCODE_SWITCH:
> case TGSI_OPCODE_CASE:
> case TGSI_OPCODE_SAMPLE_I:
> case TGSI_OPCODE_SAMPLE_I_MS:
> case TGSI_OPCODE_UMUL_HI:
> case TGSI_OPCODE_UP2H:
> + case TGSI_OPCODE_U2I64:
> return TGSI_TYPE_UNSIGNED;
> case TGSI_OPCODE_IMUL_HI:
> case TGSI_OPCODE_I2F:
> case TGSI_OPCODE_I2D:
> + case TGSI_OPCODE_I2I64:
> return TGSI_TYPE_SIGNED;
> case TGSI_OPCODE_ARL:
> case TGSI_OPCODE_ARR:
> case TGSI_OPCODE_TXQ_LZ:
> case TGSI_OPCODE_F2D:
> case TGSI_OPCODE_F2I:
> case TGSI_OPCODE_F2U:
> case TGSI_OPCODE_FSEQ:
> case TGSI_OPCODE_FSGE:
> case TGSI_OPCODE_FSLT:
> case TGSI_OPCODE_FSNE:
> case TGSI_OPCODE_UCMP:
> + case TGSI_OPCODE_F2U64:
> + case TGSI_OPCODE_F2I64:
> return TGSI_TYPE_FLOAT;
> case TGSI_OPCODE_D2F:
> case TGSI_OPCODE_D2U:
> case TGSI_OPCODE_D2I:
> case TGSI_OPCODE_DSEQ:
> case TGSI_OPCODE_DSGE:
> case TGSI_OPCODE_DSLT:
> case TGSI_OPCODE_DSNE:
> + case TGSI_OPCODE_D2U64:
> + case TGSI_OPCODE_D2I64:
> return TGSI_TYPE_DOUBLE;
> + case TGSI_OPCODE_U64SEQ:
> + case TGSI_OPCODE_U64SNE:
> + case TGSI_OPCODE_U64SLT:
> + case TGSI_OPCODE_U64SGE:
> + case TGSI_OPCODE_U642F:
> + case TGSI_OPCODE_U642D:
> + return TGSI_TYPE_UNSIGNED64;
> + case TGSI_OPCODE_I64SLT:
> + case TGSI_OPCODE_I64SGE:
> + case TGSI_OPCODE_I642F:
> + case TGSI_OPCODE_I642D:
> + return TGSI_TYPE_SIGNED64;
> default:
> return tgsi_opcode_infer_type(opcode);
> }
> }
>
> /*
> * infer the destination type of a TGSI opcode.
> */
> enum tgsi_opcode_type
> tgsi_opcode_infer_dst_type( uint opcode )
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
> index c43bdfd..8830f5a 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
> @@ -91,21 +91,23 @@ tgsi_get_opcode_name( uint opcode );
>
> const char *
> tgsi_get_processor_name( uint processor );
>
> enum tgsi_opcode_type {
> TGSI_TYPE_UNTYPED, /* for MOV */
> TGSI_TYPE_VOID,
> TGSI_TYPE_UNSIGNED,
> TGSI_TYPE_SIGNED,
> TGSI_TYPE_FLOAT,
> - TGSI_TYPE_DOUBLE
> + TGSI_TYPE_DOUBLE,
> + TGSI_TYPE_UNSIGNED64,
> + TGSI_TYPE_SIGNED64,
> };
>
> static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
> {
> if (type == TGSI_TYPE_DOUBLE)
> return true;
> return false;
> }
>
> enum tgsi_opcode_type
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 881aef6..000ea3a 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -2075,20 +2075,260 @@ Perform a * b + c with no intermediate rounding step.
> dst.zw = double(src0.y)
>
> .. opcode:: D2U - Double to Unsigned Int
>
> .. math::
>
> dst.x = unsigned(src0.xy)
>
> dst.y = unsigned(src0.zw)
>
> +64-bit Integer ISA
> +^^^^^^^^^^^^^^^^^^
> +
> +The 64-bit integer opcodes reinterpret four-component vectors into
> +two-component vectors with 64-bits in each component.
> +
> +.. opcode:: I64ABS - 64-bit Integer Absolute Value
> +
> + dst.xy = |src0.xy|
> + dst.zw = |src0.zw|
> +
> +.. opcode:: I64NEG - 64-bit Integer Negate
> +
> + Two's complement.
> +
> +.. math::
> +
> + dst.xy = -src.xy
> + dst.zw = -src.zw
> +
> +.. opcode:: I64SSG - 64-bit Integer Set Sign
> +
> +.. math::
> +
> + dst.xy = (src0.xy < 0) ? -1 : (src0.xy > 0) ? 1 : 0
> + dst.zw = (src0.zw < 0) ? -1 : (src0.zw > 0) ? 1 : 0
> +
> +.. opcode:: U64ADD - 64-bit Integer Add
> +
> +.. math::
> +
> + dst.xy = src0.xy + src1.xy
> + dst.zw = src0.zw + src1.zw
> +
> +.. opcode:: U64MUL - 64-bit Integer Multiply
> +
> +.. math::
> +
> + dst.xy = src0.xy * src1.xy
> + dst.zw = src0.zw * src1.zw
> +
> +.. opcode:: U64SEQ - 64-bit Integer Set on Equal
> +
> +.. math::
> +
> + dst.x = src0.xy == src1.xy ? \sim 0 : 0
> + dst.z = src0.zw == src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SNE - 64-bit Integer Set on Not Equal
> +
> +.. math::
> +
> + dst.x = src0.xy != src1.xy ? \sim 0 : 0
> + dst.z = src0.zw != src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SLT - 64-bit Unsigned Integer Set on Less Than
> +
> +.. math::
> +
> + dst.x = src0.xy < src1.xy ? \sim 0 : 0
> + dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: U64SGE - 64-bit Unsigned Integer Set on Greater Equal
> +
> +.. math::
> +
> + dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> + dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SLT - 64-bit Signed Integer Set on Less Than
> +
> +.. math::
> +
> + dst.x = src0.xy < src1.xy ? \sim 0 : 0
> + dst.z = src0.zw < src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64SGE - 64-bit Signed Integer Set on Greater Equal
> +
> +.. math::
> +
> + dst.x = src0.xy >= src1.xy ? \sim 0 : 0
> + dst.z = src0.zw >= src1.zw ? \sim 0 : 0
> +
> +.. opcode:: I64MIN - Minimum of 64-bit Signed Integers
> +
> +.. math::
> +
> + dst.xy = min(src0.xy, src1.xy)
> + dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: U64MIN - Minimum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> + dst.xy = min(src0.xy, src1.xy)
> + dst.zw = min(src0.zw, src1.zw)
> +
> +.. opcode:: I64MAX - Maximum of 64-bit Signed Integers
> +
> +.. math::
> +
> + dst.xy = max(src0.xy, src1.xy)
> + dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64MAX - Maximum of 64-bit Unsigned Integers
> +
> +.. math::
> +
> + dst.xy = max(src0.xy, src1.xy)
> + dst.zw = max(src0.zw, src1.zw)
> +
> +.. opcode:: U64SHL - Shift Left 64-bit Unsigned Integer
> +
> + The shift count is masked with 0x1f before the shift is applied.
Another 0x1f -> 0x3f :-).
Otherwise, looks alright to me, though still not sure if I think filling
all the opcode gaps that way is a good idea.
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
> +
> +.. math::
> +
> + dst.xy = src0.xy << (0x3f \& src1.x)
> + dst.zw = src0.zw << (0x3f \& src1.y)
> +
> +.. opcode:: I64SHR - Arithmetic Shift Right (of 64-bit Signed Integer)
> +
> + The shift count is masked with 0x3f before the shift is applied.
> +
> +.. math::
> +
> + dst.xy = src0.xy >> (0x3f \& src1.x)
> + dst.zw = src0.zw >> (0x3f \& src1.y)
> +
> +.. opcode:: U64SHR - Logical Shift Right (of 64-bit Unsigned Integer)
> +
> + The shift count is masked with 0x3f before the shift is applied.
> +
> +.. math::
> +
> + dst.xy = src0.xy >> (unsigned) (0x3f \& src1.x)
> + dst.zw = src0.zw >> (unsigned) (0x3f \& src1.y)
> +
> +.. opcode:: I64DIV - 64-bit Signed Integer Division
> +
> +.. math::
> +
> + dst.xy = src0.xy \ src1.xy
> + dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64DIV - 64-bit Unsigned Integer Division
> +
> +.. math::
> +
> + dst.xy = src0.xy \ src1.xy
> + dst.zw = src0.zw \ src1.zw
> +
> +.. opcode:: U64MOD - 64-bit Unsigned Integer Remainder
> +
> +.. math::
> +
> + dst.xy = src0.xy \bmod src1.xy
> + dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: I64MOD - 64-bit Signed Integer Remainder
> +
> +.. math::
> +
> + dst.xy = src0.xy \bmod src1.xy
> + dst.zw = src0.zw \bmod src1.zw
> +
> +.. opcode:: F2U64 - Float to 64-bit Unsigned Int
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.x
> + dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: F2I64 - Float to 64-bit Int
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.x
> + dst.zw = (int64_t) src0.y
> +
> +.. opcode:: U2I64 - Unsigned Integer to 64-bit Integer
> +
> + This is a zero extension.
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.x
> + dst.zw = (uint64_t) src0.y
> +
> +.. opcode:: I2I64 - Signed Integer to 64-bit Integer
> +
> + This is a sign extension.
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.x
> + dst.zw = (int64_t) src0.y
> +
> +.. opcode:: D2U64 - Double to 64-bit Unsigned Int
> +
> +.. math::
> +
> + dst.xy = (uint64_t) src0.xy
> + dst.zw = (uint64_t) src0.zw
> +
> +.. opcode:: D2I64 - Double to 64-bit Int
> +
> +.. math::
> +
> + dst.xy = (int64_t) src0.xy
> + dst.zw = (int64_t) src0.zw
> +
> +.. opcode:: U642F - 64-bit unsigned integer to float
> +
> +.. math::
> +
> + dst.x = (float) src0.xy
> + dst.y = (float) src0.zw
> +
> +.. opcode:: I642F - 64-bit Int to Float
> +
> +.. math::
> +
> + dst.x = (float) src0.xy
> + dst.y = (float) src0.zw
> +
> +.. opcode:: U642D - 64-bit unsigned integer to double
> +
> +.. math::
> +
> + dst.xy = (double) src0.xy
> + dst.zw = (double) src0.zw
> +
> +.. opcode:: I642D - 64-bit Int to double
> +
> +.. math::
> +
> + dst.xy = (double) src0.xy
> + dst.zw = (double) src0.zw
> +
> .. _samplingopcodes:
>
> Resource Sampling Opcodes
> ^^^^^^^^^^^^^^^^^^^^^^^^^
>
> Those opcodes follow very closely semantics of the respective Direct3D
> instructions. If in doubt double check Direct3D documentation.
> Note that the swizzle on SVIEW (src1) determines texel swizzling
> after lookup.
>
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 39ce9ea..a8d323a 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -338,59 +338,61 @@ struct tgsi_property_data {
> #define TGSI_OPCODE_MIN 12
> #define TGSI_OPCODE_MAX 13
> #define TGSI_OPCODE_SLT 14
> #define TGSI_OPCODE_SGE 15
> #define TGSI_OPCODE_MAD 16
> #define TGSI_OPCODE_SUB 17
> #define TGSI_OPCODE_LRP 18
> #define TGSI_OPCODE_FMA 19
> #define TGSI_OPCODE_SQRT 20
> #define TGSI_OPCODE_DP2A 21
> - /* gap */
> +#define TGSI_OPCODE_F2U64 22
> +#define TGSI_OPCODE_F2I64 23
> #define TGSI_OPCODE_FRC 24
> #define TGSI_OPCODE_CLAMP 25
> #define TGSI_OPCODE_FLR 26
> #define TGSI_OPCODE_ROUND 27
> #define TGSI_OPCODE_EX2 28
> #define TGSI_OPCODE_LG2 29
> #define TGSI_OPCODE_POW 30
> #define TGSI_OPCODE_XPD 31
> - /* gap */
> +#define TGSI_OPCODE_U2I64 32
> #define TGSI_OPCODE_ABS 33
> - /* gap */
> +#define TGSI_OPCODE_I2I64 34
> #define TGSI_OPCODE_DPH 35
> #define TGSI_OPCODE_COS 36
> #define TGSI_OPCODE_DDX 37
> #define TGSI_OPCODE_DDY 38
> #define TGSI_OPCODE_KILL 39 /* unconditional */
> #define TGSI_OPCODE_PK2H 40
> #define TGSI_OPCODE_PK2US 41
> #define TGSI_OPCODE_PK4B 42
> #define TGSI_OPCODE_PK4UB 43
> - /* gap */
> +#define TGSI_OPCODE_D2U64 44
> #define TGSI_OPCODE_SEQ 45
> - /* gap */
> +#define TGSI_OPCODE_D2I64 46
> #define TGSI_OPCODE_SGT 47
> #define TGSI_OPCODE_SIN 48
> #define TGSI_OPCODE_SLE 49
> #define TGSI_OPCODE_SNE 50
> - /* gap */
> +#define TGSI_OPCODE_U642D 51
> #define TGSI_OPCODE_TEX 52
> #define TGSI_OPCODE_TXD 53
> #define TGSI_OPCODE_TXP 54
> #define TGSI_OPCODE_UP2H 55
> #define TGSI_OPCODE_UP2US 56
> #define TGSI_OPCODE_UP4B 57
> #define TGSI_OPCODE_UP4UB 58
> - /* gap */
> +#define TGSI_OPCODE_U642F 59
> +#define TGSI_OPCODE_I642F 60
> #define TGSI_OPCODE_ARR 61
> - /* gap */
> +#define TGSI_OPCODE_I642D 62
> #define TGSI_OPCODE_CAL 63
> #define TGSI_OPCODE_RET 64
> #define TGSI_OPCODE_SSG 65 /* SGN */
> #define TGSI_OPCODE_CMP 66
> #define TGSI_OPCODE_SCS 67
> #define TGSI_OPCODE_TXB 68
> /* gap */
> #define TGSI_OPCODE_DIV 70
> #define TGSI_OPCODE_DP2 71
> #define TGSI_OPCODE_TXL 72
> @@ -561,21 +563,47 @@ struct tgsi_property_data {
> #define TGSI_OPCODE_DTRUNC 218 /* nvc0 */
> #define TGSI_OPCODE_DCEIL 219 /* nvc0 */
> #define TGSI_OPCODE_DFLR 220 /* nvc0 */
> #define TGSI_OPCODE_DROUND 221 /* nvc0 */
> #define TGSI_OPCODE_DSSG 222
>
> #define TGSI_OPCODE_VOTE_ANY 223
> #define TGSI_OPCODE_VOTE_ALL 224
> #define TGSI_OPCODE_VOTE_EQ 225
>
> -#define TGSI_OPCODE_LAST 226
> +#define TGSI_OPCODE_U64SEQ 226
> +#define TGSI_OPCODE_U64SNE 227
> +#define TGSI_OPCODE_I64SLT 228
> +#define TGSI_OPCODE_U64SLT 229
> +#define TGSI_OPCODE_I64SGE 230
> +#define TGSI_OPCODE_U64SGE 231
> +
> +#define TGSI_OPCODE_I64MIN 232
> +#define TGSI_OPCODE_U64MIN 233
> +#define TGSI_OPCODE_I64MAX 234
> +#define TGSI_OPCODE_U64MAX 235
> +
> +#define TGSI_OPCODE_I64ABS 236
> +#define TGSI_OPCODE_I64SSG 237
> +#define TGSI_OPCODE_I64NEG 238
> +
> +#define TGSI_OPCODE_U64ADD 239
> +#define TGSI_OPCODE_U64MUL 240
> +#define TGSI_OPCODE_U64SHL 241
> +#define TGSI_OPCODE_I64SHR 242
> +#define TGSI_OPCODE_U64SHR 243
> +
> +#define TGSI_OPCODE_I64DIV 244
> +#define TGSI_OPCODE_U64DIV 245
> +#define TGSI_OPCODE_I64MOD 246
> +#define TGSI_OPCODE_U64MOD 247
> +#define TGSI_OPCODE_LAST 248
>
> /**
> * Opcode is the operation code to execute. A given operation defines the
> * semantics how the source registers (if any) are interpreted and what is
> * written to the destination registers (if any) as a result of execution.
> *
> * NumDstRegs and NumSrcRegs is the number of destination and source registers,
> * respectively. For a given operation code, those numbers are fixed and are
> * present here only for convenience.
> *
>
More information about the mesa-dev
mailing list