[Mesa-dev] [PATCH v3] tgsi: add BALLOT/READ_* opcodes
Nicolai Hähnle
nhaehnle at gmail.com
Tue Apr 4 14:41:26 UTC 2017
From: Ilia Mirkin <imirkin at alum.mit.edu>
v2 (Nicolai):
- BALLOT isn't per-channel
- expand the documentation (also for VOTE_*)
v3:
- only BALLOT returns a 64-bit lanemask (Boyan)
- relax the requirement on READ_INVOC: the invocation number to read
from must be uniform within a sub-group. This matches the
GL_ARB_shader_ballot spect (and the v_readlane instruction of AMD
GCN)
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/auxiliary/tgsi/tgsi_info.c | 6 +--
src/gallium/docs/source/tgsi.rst | 68 +++++++++++++++++++++++++-----
src/gallium/include/pipe/p_shader_tokens.h | 6 +--
3 files changed, 63 insertions(+), 17 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 5a6a9bc..30bad6d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,51 +106,51 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
- { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
+ { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC },
{ 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
{ 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
{ 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
{ 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
+ { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
{ 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 05b06ce..4c68062 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2852,36 +2852,82 @@ only be used with 32-bit integer image formats.
The following operation is performed atomically:
.. math::
dst_x = resource[offset]
resource[offset] = (dst_x > src_x ? dst_x : src_x)
-.. _voteopcodes:
+.. _interlaneopcodes:
+
+Inter-lane opcodes
+^^^^^^^^^^^^^^^^^^
+
+These opcodes reduce the given value across the shader invocations
+running in the current SIMD group. Every thread in the subgroup will receive
+the same result. The BALLOT operations accept a single-channel argument that
+is treated as a boolean and produce a 64-bit value.
+
+.. opcode:: VOTE_ANY - Value is set in any of the active invocations
+
+ Syntax: ``VOTE_ANY dst, value``
+
+ Example: ``VOTE_ANY TEMP[0].xy, TEMP[1].x``
+
+
+.. opcode:: VOTE_ALL - Value is set in all of the active invocations
+
+ Syntax: ``VOTE_ALL dst, value``
+
+ Example: ``VOTE_ALL TEMP[0].xy, TEMP[1].x``
+
+
+.. opcode:: VOTE_EQ - Value is the same in all of the active invocations
+
+ Syntax: ``VOTE_EQ dst, value``
+
+ Example: ``VOTE_EQ TEMP[0].xy, TEMP[1].x``
+
+
+.. opcode:: BALLOT - Lanemask of whether the value is set in each active
+ invocation
+
+ Syntax: ``BALLOT dst, value``
+
+ Example: ``BALLOT TEMP[0].xy, TEMP[1].x``
+
+ When the argument is a constant true, this produces a bitmask of active
+ invocations. In fragment shaders, this can include helper invocations
+ (invocations whose outputs and writes to memory are discarded, but which
+ are used to compute derivatives).
+
+
+.. opcode:: READ_FIRST - Broadcast the value from the first active
+ invocation to all active lanes
+
+ Syntax: ``READ_FIRST dst, value``
+
+ Example: ``READ_FIRST TEMP[0], TEMP[1]``
-Vote opcodes
-^^^^^^^^^^^^
-These opcodes compare the given value across the shader invocations
-running in the current SIMD group. The details of exactly which
-invocations get compared are implementation-defined, and it would be a
-correct implementation to only ever consider the current thread's
-value. (i.e. SIMD group of 1). The argument is treated as a boolean.
+.. opcode:: READ_INVOC - Retrieve the value from the given invocation
+ (need not be uniform)
-.. opcode:: VOTE_ANY - Value is set in any of the current invocations
+ Syntax: ``READ_INVOC dst, value, invocation``
-.. opcode:: VOTE_ALL - Value is set in all of the current invocations
+ Example: ``READ_INVOC TEMP[0].xy, TEMP[1].xy, TEMP[2].x``
-.. opcode:: VOTE_EQ - Value is the same in all of the current invocations
+ invocation.x controls the invocation number to read from for all channels.
+ The invocation number must be the same across all active invocations in a
+ sub-group; otherwise, the results are undefined.
Explanation of symbols used
------------------------------
Functions
^^^^^^^^^^^^^^
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 8c08f27..d461f78 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -398,53 +398,53 @@ struct tgsi_property_data {
#define TGSI_OPCODE_CMP 66
#define TGSI_OPCODE_SCS 67
#define TGSI_OPCODE_TXB 68
#define TGSI_OPCODE_FBFETCH 69
#define TGSI_OPCODE_DIV 70
#define TGSI_OPCODE_DP2 71
#define TGSI_OPCODE_TXL 72
#define TGSI_OPCODE_BRK 73
#define TGSI_OPCODE_IF 74
#define TGSI_OPCODE_UIF 75
- /* gap */
+#define TGSI_OPCODE_READ_INVOC 76
#define TGSI_OPCODE_ELSE 77
#define TGSI_OPCODE_ENDIF 78
#define TGSI_OPCODE_DDX_FINE 79
#define TGSI_OPCODE_DDY_FINE 80
#define TGSI_OPCODE_PUSHA 81
#define TGSI_OPCODE_POPA 82
#define TGSI_OPCODE_CEIL 83
#define TGSI_OPCODE_I2F 84
#define TGSI_OPCODE_NOT 85
#define TGSI_OPCODE_TRUNC 86
#define TGSI_OPCODE_SHL 87
- /* gap */
+#define TGSI_OPCODE_BALLOT 88
#define TGSI_OPCODE_AND 89
#define TGSI_OPCODE_OR 90
#define TGSI_OPCODE_MOD 91
#define TGSI_OPCODE_XOR 92
#define TGSI_OPCODE_SAD 93
#define TGSI_OPCODE_TXF 94
#define TGSI_OPCODE_TXQ 95
#define TGSI_OPCODE_CONT 96
#define TGSI_OPCODE_EMIT 97
#define TGSI_OPCODE_ENDPRIM 98
#define TGSI_OPCODE_BGNLOOP 99
#define TGSI_OPCODE_BGNSUB 100
#define TGSI_OPCODE_ENDLOOP 101
#define TGSI_OPCODE_ENDSUB 102
#define TGSI_OPCODE_TXQ_LZ 103 /* TXQ for mipmap level 0 */
#define TGSI_OPCODE_TXQS 104
#define TGSI_OPCODE_RESQ 105
- /* gap */
+#define TGSI_OPCODE_READ_FIRST 106
#define TGSI_OPCODE_NOP 107
#define TGSI_OPCODE_FSEQ 108
#define TGSI_OPCODE_FSGE 109
#define TGSI_OPCODE_FSLT 110
#define TGSI_OPCODE_FSNE 111
#define TGSI_OPCODE_MEMBAR 112
#define TGSI_OPCODE_CALLNZ 113
/* gap */
--
2.9.3
More information about the mesa-dev
mailing list