[Mesa-dev] [PATCH v3] tgsi: add BALLOT/READ_* opcodes

Ilia Mirkin imirkin at alum.mit.edu
Tue Apr 4 14:48:51 UTC 2017


On Tue, Apr 4, 2017 at 10:41 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Ilia Mirkin <imirkin at alum.mit.edu>
>
> v2 (Nicolai):
> - BALLOT isn't per-channel
> - expand the documentation (also for VOTE_*)
>
> v3:
> - only BALLOT returns a 64-bit lanemask (Boyan)
> - relax the requirement on READ_INVOC: the invocation number to read
>   from must be uniform within a sub-group. This matches the
>   GL_ARB_shader_ballot spect (and the v_readlane instruction of AMD
>   GCN)
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> ---
>  src/gallium/auxiliary/tgsi/tgsi_info.c     |  6 +--
>  src/gallium/docs/source/tgsi.rst           | 68 +++++++++++++++++++++++++-----
>  src/gallium/include/pipe/p_shader_tokens.h |  6 +--
>  3 files changed, 63 insertions(+), 17 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 5a6a9bc..30bad6d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -106,51 +106,51 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
>     { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
>     { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
>     { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
>     { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
>     { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 },      /* removed */
> +   { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC },
>     { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
>     { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
>     { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
>     { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 },      /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
>     { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
>     { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
>     { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
>     { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
>     { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
>     { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
>     { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
>     { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
>     { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST },
>     { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
>     { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
>     { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 },     /* removed */
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
>     { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 05b06ce..4c68062 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -2852,36 +2852,82 @@ only be used with 32-bit integer image formats.
>
>    The following operation is performed atomically:
>
>  .. math::
>
>    dst_x = resource[offset]
>
>    resource[offset] = (dst_x > src_x ? dst_x : src_x)
>
>
> -.. _voteopcodes:
> +.. _interlaneopcodes:
> +
> +Inter-lane opcodes
> +^^^^^^^^^^^^^^^^^^
> +
> +These opcodes reduce the given value across the shader invocations
> +running in the current SIMD group. Every thread in the subgroup will receive
> +the same result. The BALLOT operations accept a single-channel argument that
> +is treated as a boolean and produce a 64-bit value.
> +
> +.. opcode:: VOTE_ANY - Value is set in any of the active invocations
> +
> +  Syntax: ``VOTE_ANY dst, value``
> +
> +  Example: ``VOTE_ANY TEMP[0].xy, TEMP[1].x``
> +
> +
> +.. opcode:: VOTE_ALL - Value is set in all of the active invocations
> +
> +  Syntax: ``VOTE_ALL dst, value``
> +
> +  Example: ``VOTE_ALL TEMP[0].xy, TEMP[1].x``
> +
> +
> +.. opcode:: VOTE_EQ - Value is the same in all of the active invocations
> +
> +  Syntax: ``VOTE_EQ dst, value``
> +
> +  Example: ``VOTE_EQ TEMP[0].xy, TEMP[1].x``

All of the above should be TEMP[0].x as the dst.

> +
> +
> +.. opcode:: BALLOT - Lanemask of whether the value is set in each active
> +            invocation
> +
> +  Syntax: ``BALLOT dst, value``
> +
> +  Example: ``BALLOT TEMP[0].xy, TEMP[1].x``
> +
> +  When the argument is a constant true, this produces a bitmask of active
> +  invocations. In fragment shaders, this can include helper invocations
> +  (invocations whose outputs and writes to memory are discarded, but which
> +  are used to compute derivatives).
> +
> +
> +.. opcode:: READ_FIRST - Broadcast the value from the first active
> +            invocation to all active lanes
> +
> +  Syntax: ``READ_FIRST dst, value``
> +
> +  Example: ``READ_FIRST TEMP[0], TEMP[1]``
>
> -Vote opcodes
> -^^^^^^^^^^^^
>
> -These opcodes compare the given value across the shader invocations
> -running in the current SIMD group. The details of exactly which
> -invocations get compared are implementation-defined, and it would be a
> -correct implementation to only ever consider the current thread's
> -value. (i.e. SIMD group of 1). The argument is treated as a boolean.
> +.. opcode:: READ_INVOC - Retrieve the value from the given invocation
> +            (need not be uniform)
>
> -.. opcode:: VOTE_ANY - Value is set in any of the current invocations
> +  Syntax: ``READ_INVOC dst, value, invocation``
>
> -.. opcode:: VOTE_ALL - Value is set in all of the current invocations
> +  Example: ``READ_INVOC TEMP[0].xy, TEMP[1].xy, TEMP[2].x``
>
> -.. opcode:: VOTE_EQ - Value is the same in all of the current invocations
> +  invocation.x controls the invocation number to read from for all channels.
> +  The invocation number must be the same across all active invocations in a
> +  sub-group; otherwise, the results are undefined.
>
>
>  Explanation of symbols used
>  ------------------------------
>
>
>  Functions
>  ^^^^^^^^^^^^^^
>
>
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 8c08f27..d461f78 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -398,53 +398,53 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_CMP                 66
>  #define TGSI_OPCODE_SCS                 67
>  #define TGSI_OPCODE_TXB                 68
>  #define TGSI_OPCODE_FBFETCH             69
>  #define TGSI_OPCODE_DIV                 70
>  #define TGSI_OPCODE_DP2                 71
>  #define TGSI_OPCODE_TXL                 72
>  #define TGSI_OPCODE_BRK                 73
>  #define TGSI_OPCODE_IF                  74
>  #define TGSI_OPCODE_UIF                 75
> -                                /* gap */
> +#define TGSI_OPCODE_READ_INVOC          76
>  #define TGSI_OPCODE_ELSE                77
>  #define TGSI_OPCODE_ENDIF               78
>
>  #define TGSI_OPCODE_DDX_FINE            79
>  #define TGSI_OPCODE_DDY_FINE            80
>
>  #define TGSI_OPCODE_PUSHA               81
>  #define TGSI_OPCODE_POPA                82
>  #define TGSI_OPCODE_CEIL                83
>  #define TGSI_OPCODE_I2F                 84
>  #define TGSI_OPCODE_NOT                 85
>  #define TGSI_OPCODE_TRUNC               86
>  #define TGSI_OPCODE_SHL                 87
> -                                /* gap */
> +#define TGSI_OPCODE_BALLOT              88
>  #define TGSI_OPCODE_AND                 89
>  #define TGSI_OPCODE_OR                  90
>  #define TGSI_OPCODE_MOD                 91
>  #define TGSI_OPCODE_XOR                 92
>  #define TGSI_OPCODE_SAD                 93
>  #define TGSI_OPCODE_TXF                 94
>  #define TGSI_OPCODE_TXQ                 95
>  #define TGSI_OPCODE_CONT                96
>  #define TGSI_OPCODE_EMIT                97
>  #define TGSI_OPCODE_ENDPRIM             98
>  #define TGSI_OPCODE_BGNLOOP             99
>  #define TGSI_OPCODE_BGNSUB              100
>  #define TGSI_OPCODE_ENDLOOP             101
>  #define TGSI_OPCODE_ENDSUB              102
>  #define TGSI_OPCODE_TXQ_LZ              103 /* TXQ for mipmap level 0 */
>  #define TGSI_OPCODE_TXQS                104
>  #define TGSI_OPCODE_RESQ                105
> -                                /* gap */
> +#define TGSI_OPCODE_READ_FIRST          106
>  #define TGSI_OPCODE_NOP                 107
>
>  #define TGSI_OPCODE_FSEQ                108
>  #define TGSI_OPCODE_FSGE                109
>  #define TGSI_OPCODE_FSLT                110
>  #define TGSI_OPCODE_FSNE                111
>
>  #define TGSI_OPCODE_MEMBAR              112
>  #define TGSI_OPCODE_CALLNZ              113
>                                  /* gap */
> --
> 2.9.3
>


More information about the mesa-dev mailing list