Mesa (main): ir3: Add gen4 new subgroup instructions

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Dec 7 21:17:58 UTC 2021


Module: Mesa
Branch: main
Commit: d1c49901dfe068323ffd8a3f5a98dc0505b3632a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d1c49901dfe068323ffd8a3f5a98dc0505b3632a

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Mon Nov 15 16:57:38 2021 +0200

ir3: Add gen4 new subgroup instructions

* getlast.w8 #4 - Perform jump for the first (CLUSTER_SIZE-1)
   fibers in a subgroup
* brcst.active.w8 - necessary to implement arithmetic subgroup
   operations with prefix sum.
* quad_shuffle.brcst - subgroupQuadBroadcast
* quad_shuffle.horiz - subgroupQuadSwapHorizontal
* quad_shuffle.vert - subgroupQuadSwapVertical
* quad_shuffle.diag - subgroupQuadSwapDiagonal
* getfiberid - gl_SubgroupID

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13817>

---

 src/freedreno/ir3/disasm-a3xx.c  |   7 ++
 src/freedreno/ir3/instr-a3xx.h   |  11 +-
 src/freedreno/ir3/ir3.h          |   2 +
 src/freedreno/ir3/ir3_lexer.l    |  19 ++--
 src/freedreno/ir3/ir3_parser.y   |  26 ++++-
 src/freedreno/ir3/tests/disasm.c |  11 ++
 src/freedreno/isa/encode.c       |   1 +
 src/freedreno/isa/ir3-cat0.xml   |  32 ++++++
 src/freedreno/isa/ir3-cat5.xml   | 222 +++++++++++++++++++++++++++++----------
 src/freedreno/isa/ir3-cat6.xml   |  23 ++++
 10 files changed, 281 insertions(+), 73 deletions(-)

diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 24b3805085c..362db68b763 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -173,6 +173,7 @@ static const struct opc_info {
    OPC(0, OPC_STKR,         stkr),
    OPC(0, OPC_XSET,         xset),
    OPC(0, OPC_XCLR,         xclr),
+   OPC(0, OPC_GETLAST,      getlast),
    OPC(0, OPC_GETONE,       getone),
    OPC(0, OPC_DBG,          dbg),
    OPC(0, OPC_SHPS,         shps),
@@ -300,6 +301,11 @@ static const struct opc_info {
    OPC(5, OPC_DSYPP_1,      dsypp.1),
    OPC(5, OPC_RGETPOS,      rgetpos),
    OPC(5, OPC_RGETINFO,     rgetinfo),
+   OPC(5, OPC_BRCST_ACTIVE, brcst.active),
+   OPC(5, OPC_QUAD_SHUFFLE_BRCST, quad_shuffle.brcst),
+   OPC(5, OPC_QUAD_SHUFFLE_HORIZ, quad_shuffle.horiz),
+   OPC(5, OPC_QUAD_SHUFFLE_VERT,  quad_shuffle.vert),
+   OPC(5, OPC_QUAD_SHUFFLE_DIAG,  quad_shuffle.diag),
    /* macros are needed here for ir3_print */
    OPC(5, OPC_DSXPP_MACRO,  dsxpp.macro),
    OPC(5, OPC_DSYPP_MACRO,  dsypp.macro),
@@ -377,6 +383,7 @@ static const struct opc_info {
    OPC(6, OPC_ENDLS,        endls),
    OPC(6, OPC_GETSPID,      getspid),
    OPC(6, OPC_GETWID,       getwid),
+   OPC(6, OPC_GETFIBERID,   getfiberid),
 
    OPC(6, OPC_SPILL_MACRO,  spill.macro),
    OPC(6, OPC_RELOAD_MACRO, reload.macro),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 8a85f575ddb..cffe78b1442 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -80,6 +80,7 @@ typedef enum {
    OPC_DBG             = _OPC(0, 22),
    OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
    OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
+   OPC_GETLAST         = _OPC(0, 25),
 
    OPC_PREDT           = _OPC(0, 29),   /* predicated true */
    OPC_PREDF           = _OPC(0, 30),   /* predicated false */
@@ -245,9 +246,14 @@ typedef enum {
    OPC_DSYPP_1         = _OPC(5, 25),
    OPC_RGETPOS         = _OPC(5, 26),
    OPC_RGETINFO        = _OPC(5, 27),
+   OPC_BRCST_ACTIVE    = _OPC(5, 28),
+   OPC_QUAD_SHUFFLE_BRCST  = _OPC(5, 29),
+   OPC_QUAD_SHUFFLE_HORIZ  = _OPC(5, 30),
+   OPC_QUAD_SHUFFLE_VERT   = _OPC(5, 31),
+   OPC_QUAD_SHUFFLE_DIAG   = _OPC(5, 32),
    /* cat5 meta instructions, placed above the cat5 opc field's size */
-   OPC_DSXPP_MACRO     = _OPC(5, 32),
-   OPC_DSYPP_MACRO     = _OPC(5, 33),
+   OPC_DSXPP_MACRO     = _OPC(5, 35),
+   OPC_DSYPP_MACRO     = _OPC(5, 36),
 
    /* category 6: */
    OPC_LDG             = _OPC(6, 0),        /* load-global */
@@ -286,6 +292,7 @@ typedef enum {
    OPC_ENDLS           = _OPC(6, 35), /* ??? */
    OPC_GETSPID         = _OPC(6, 36), /* SP ID */
    OPC_GETWID          = _OPC(6, 37), /* wavefront ID */
+   OPC_GETFIBERID      = _OPC(6, 38), /* fiber ID */
 
    /* Logical opcodes for things that differ in a6xx+ */
    OPC_STC             = _OPC(6, 40),
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 20b2298b7b1..4e081977295 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -330,6 +330,7 @@ struct ir3_instruction {
       struct {
          unsigned samp, tex;
          unsigned tex_base : 3;
+         unsigned cluster_size : 4;
          type_t type;
       } cat5;
       struct {
@@ -2171,6 +2172,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
 }
 
 /* cat6 instructions: */
+INSTR0(GETFIBERID)
 INSTR2(LDLV)
 INSTR3(LDG)
 INSTR3(LDL)
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index 2353a049eb9..1a496e2a791 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -72,16 +72,6 @@ static int parse_reg(const char *str)
 	return num;
 }
 
-static int parse_w(const char *str)
-{
-	str++;
-	unsigned num = strtol(str, NULL, 10);
-	if ((num % 32) != 0)
-		yy_fatal_error("w# must be multiple of 32");
-	if (num < 32)
-		yy_fatal_error("w# must be at least 32");
-	return num / 32;
-}
 %}
 
 %option noyywrap
@@ -139,7 +129,7 @@ static int parse_w(const char *str)
 "a0.x"                            return T_A0;
 "a1.x"                            return T_A1;
 "p0."[xyzw]                       ir3_yylval.num = parse_reg(yytext); return T_P0;
-"w"[0-9]+                         ir3_yylval.num = parse_w(yytext);   return T_W;
+"w"[0-9]+                         ir3_yylval.num = strtol(yytext+1, NULL, 10); return T_W;
 "s#"[0-9]+                        ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_SAMP;
 "t#"[0-9]+                        ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_TEX;
 
@@ -167,6 +157,7 @@ static int parse_w(const char *str)
 "stkr"                            return TOKEN(T_OP_STKR);
 "xset"                            return TOKEN(T_OP_XSET);
 "xclr"                            return TOKEN(T_OP_XCLR);
+"getlast"                         return TOKEN(T_OP_GETLAST);
 "getone"                          return TOKEN(T_OP_GETONE);
 "dbg"                             return TOKEN(T_OP_DBG);
 "shps"                            return TOKEN(T_OP_SHPS);
@@ -296,6 +287,11 @@ static int parse_w(const char *str)
 "dsypp.1"                         return TOKEN(T_OP_DSYPP_1);
 "rgetpos"                         return TOKEN(T_OP_RGETPOS);
 "rgetinfo"                        return TOKEN(T_OP_RGETINFO);
+"brcst.active"                    return TOKEN(T_OP_BRCST_A);
+"quad_shuffle.brcst"              return TOKEN(T_OP_QSHUFFLE_BRCST);
+"quad_shuffle.horiz"              return TOKEN(T_OP_QSHUFFLE_H);
+"quad_shuffle.vert"               return TOKEN(T_OP_QSHUFFLE_V);
+"quad_shuffle.diag"               return TOKEN(T_OP_QSHUFFLE_DIAG);
 
                                   /* category 6: */
 "ldg"                             return TOKEN(T_OP_LDG);
@@ -369,6 +365,7 @@ static int parse_w(const char *str)
 "ldlv"                            return TOKEN(T_OP_LDLV);
 "getspid"                         return TOKEN(T_OP_GETSPID);
 "getwid"                          return TOKEN(T_OP_GETWID);
+"getfiberid"                      return TOKEN(T_OP_GETFIBERID);
 
                                   /* category 7: */
 "bar"                             return TOKEN(T_OP_BAR);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 674e2fe2003..adb9c9e334b 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -399,6 +399,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
 %token <tok> T_OP_STKR
 %token <tok> T_OP_XSET
 %token <tok> T_OP_XCLR
+%token <tok> T_OP_GETLAST
 %token <tok> T_OP_GETONE
 %token <tok> T_OP_DBG
 %token <tok> T_OP_SHPS
@@ -526,6 +527,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
 %token <tok> T_OP_DSYPP_1
 %token <tok> T_OP_RGETPOS
 %token <tok> T_OP_RGETINFO
+%token <tok> T_OP_BRCST_A
+%token <tok> T_OP_QSHUFFLE_BRCST
+%token <tok> T_OP_QSHUFFLE_H
+%token <tok> T_OP_QSHUFFLE_V
+%token <tok> T_OP_QSHUFFLE_DIAG
 
 /* category 6: */
 %token <tok> T_OP_LDG
@@ -598,6 +604,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
 %token <tok> T_OP_LDLV
 %token <tok> T_OP_GETSPID
 %token <tok> T_OP_GETWID
+%token <tok> T_OP_GETFIBERID
 
 /* category 7: */
 %token <tok> T_OP_BAR
@@ -822,6 +829,7 @@ cat0_instr:        T_OP_NOP        { new_instr(OPC_NOP); }
 |                  T_OP_PREDT      { new_instr(OPC_PREDT); }    cat0_src1
 |                  T_OP_PREDF      { new_instr(OPC_PREDF); }    cat0_src1
 |                  T_OP_PREDE      { new_instr(OPC_PREDE); }
+|                  T_OP_GETLAST '.' T_W { new_instr(OPC_GETLAST); }   cat0_immed
 
 cat1_opc:          T_OP_MOV '.' T_CAT1_TYPE_TYPE {
                        parse_type_type(new_instr(OPC_MOV), $3);
@@ -837,9 +845,16 @@ cat1_movmsk:       T_OP_MOVMSK '.' T_W {
                        new_instr(OPC_MOVMSK);
                        instr->cat1.src_type = TYPE_U32;
                        instr->cat1.dst_type = TYPE_U32;
-                       instr->repeat = $3 - 1;
                    } dst_reg {
-                       instr->dsts[0]->wrmask = (1 << $3) - 1;
+                       if (($3 % 32) != 0)
+                          yyerror("w# must be multiple of 32");
+                       if ($3 < 32)
+                          yyerror("w# must be at least 32");
+
+                       int num = $3 / 32;
+
+                       instr->repeat = num - 1;
+                       instr->dsts[0]->wrmask = (1 << num) - 1;
                    }
 
 cat1_mova1:        T_OP_MOVA1 T_A1 ',' {
@@ -995,6 +1010,11 @@ cat5_opc:          T_OP_ISAM      { new_instr(OPC_ISAM); }
 |                  T_OP_SAMGP3    { new_instr(OPC_SAMGP3); }
 |                  T_OP_RGETPOS   { new_instr(OPC_RGETPOS); }
 |                  T_OP_RGETINFO  { new_instr(OPC_RGETINFO); }
+|                  T_OP_BRCST_A   { new_instr(OPC_BRCST_ACTIVE); }
+|                  T_OP_QSHUFFLE_BRCST { new_instr(OPC_QUAD_SHUFFLE_BRCST); }
+|                  T_OP_QSHUFFLE_H     { new_instr(OPC_QUAD_SHUFFLE_HORIZ); }
+|                  T_OP_QSHUFFLE_V     { new_instr(OPC_QUAD_SHUFFLE_VERT); }
+|                  T_OP_QSHUFFLE_DIAG  { new_instr(OPC_QUAD_SHUFFLE_DIAG); }
 
 cat5_flag:         '.' T_3D       { instr->flags |= IR3_INSTR_3D; }
 |                  '.' 'a'        { instr->flags |= IR3_INSTR_A; }
@@ -1005,6 +1025,7 @@ cat5_flag:         '.' T_3D       { instr->flags |= IR3_INSTR_3D; }
 |                  '.' T_UNIFORM  { }
 |                  '.' T_NONUNIFORM  { instr->flags |= IR3_INSTR_NONUNIF; }
 |                  '.' T_BASE     { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
+|                  '.' T_W        { instr->cat5.cluster_size = $2; }
 cat5_flags:
 |                  cat5_flag cat5_flags
 
@@ -1136,6 +1157,7 @@ cat6_ibo:          cat6_ibo_opc_1src cat6_type cat6_dim dst_reg ',' 'g' '[' cat6
 cat6_id_opc:
                    T_OP_GETSPID { new_instr(OPC_GETSPID); }
 |                  T_OP_GETWID  { new_instr(OPC_GETWID); }
+|                  T_OP_GETFIBERID { new_instr(OPC_GETFIBERID); }
 
 cat6_id:           cat6_id_opc cat6_type dst_reg
 
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 679c843bb3f..325cd70d4fa 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -69,6 +69,7 @@ static const struct test {
    INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
    INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
    INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
+   INSTR_6XX(02220000_00000004, "getlast.w8 #4"),
    INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
    INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
    INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
@@ -157,6 +158,13 @@ static const struct test {
    INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"),
 
 
+   /* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */
+   INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */
+   /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
+   INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */
+   /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */
+   INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */
+
    /* cat6 */
 
    INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
@@ -374,6 +382,9 @@ static const struct test {
    /* dEQP-VK.descriptor_indexing.sampler */
    INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
 
+   /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
+   INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
+
    /* Custom test since we've never seen the blob emit these. */
    INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
    INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index 1c638cc171d..4e61d685583 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -22,6 +22,7 @@
  */
 
 #include "util/log.h"
+#include "util/u_math.h"
 
 #include "ir3/ir3.h"
 #include "ir3/ir3_shader.h"
diff --git a/src/freedreno/isa/ir3-cat0.xml b/src/freedreno/isa/ir3-cat0.xml
index bb6074011f4..5c0c0caa205 100644
--- a/src/freedreno/isa/ir3-cat0.xml
+++ b/src/freedreno/isa/ir3-cat0.xml
@@ -171,6 +171,38 @@ SOFTWARE.
 	<pattern low="55" high="58">0000</pattern> <!-- OPC -->
 </bitset>
 
+<bitset name="getlast" extends="#instruction-cat0">
+	<doc>
+		Perform a jump for all fibers in the first cluster with any active
+		fibers, except for the last fiber in the cluster.
+		While there is a separate field for CLUSTER_SIZE its value does
+		not change the behaviour in any observable way, it behaves as if
+		CLUSTER_SIZE is always 8.
+	</doc>
+
+	<gen min="600"/>
+
+	<display>
+		{SY}{SS}{JP}{NAME}.w{CLUSTER_SIZE} #{IMMED}
+	</display>
+	<derived name="CLUSTER_SIZE" type="uint">
+		<expr>
+			2 << {W}
+		</expr>
+	</derived>
+	<pattern low="32" high="36">xxxxx</pattern> <!-- INDEX -->
+	<pattern low="37" high="39">xxx</pattern>  <!-- BRTYPE -->
+	<pattern low="45" high="47">xxx</pattern>  <!-- src1 -->
+	<pattern low="49" high="51">xx1</pattern>  <!-- OPC_HI -->
+	<pattern low="55" high="58">0100</pattern> <!-- OPC -->
+
+	<field name="W" low="52" high="54" type="uint"/>
+
+	<encode>
+		<map name="W">util_logbase2(8) - 1</map>
+	</encode>
+</bitset>
+
 <bitset name="getone" extends="#instruction-cat0-immed">
 	<pattern low="49" high="51">xx1</pattern>  <!-- OPC_HI -->
 	<pattern low="55" high="58">0101</pattern> <!-- OPC -->
diff --git a/src/freedreno/isa/ir3-cat5.xml b/src/freedreno/isa/ir3-cat5.xml
index dc2ee6ac44e..73e1a985db2 100644
--- a/src/freedreno/isa/ir3-cat5.xml
+++ b/src/freedreno/isa/ir3-cat5.xml
@@ -52,33 +52,6 @@ SOFTWARE.
 </bitset>
 
 <bitset name="#instruction-cat5" extends="#instruction">
-	<override>
-		<expr>{S2EN_BINDLESS}</expr>
-		<doc>
-			The s2en (indirect) or bindless case
-		</doc>
-		<display>
-			{SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{NONUNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
-		</display>
-		<field name="BASE_HI" low="19" high="20" type="uint"/>
-		<field name="SRC3" low="21" high="28" type="#cat5-src3">
-			<param name="BINDLESS"/>
-			<param name="DESC_MODE"/>
-			<param name="HAS_SAMP"/>
-			<param name="HAS_TEX"/>
-		</field>
-		<field name="DESC_MODE" low="29" high="31" type="#cat5-s2en-bindless-desc-mode"/>
-		<field name="BASE" pos="47" type="#cat5-s2en-bindless-base">
-			<param name="BINDLESS"/>
-			<param name="BASE_HI"/>
-		</field>
-		<derived name="BINDLESS" expr="#cat5-s2enb-is-bindless" type="bool"/>
-		<derived name="S2EN" expr="#cat5-s2enb-is-indirect" type="bool" display=".s2en"/>
-		<derived name="UNIFORM" expr="#cat5-s2enb-is-uniform" type="bool" display=".uniform"/>
-		<derived name="NONUNIFORM" expr="#cat5-s2enb-is-nonuniform" type="bool" display=".nonuniform"/>
-		<derived name="A1" expr="#cat5-s2enb-uses_a1" type="bool" display=", a1.x"/>
-	</override>
-
 	<doc>
 		The "normal" case, ie. not s2en (indirect) and/or bindless
 	</doc>
@@ -105,7 +78,7 @@ SOFTWARE.
 		to figure out what this bit does
 	 -->
 	<pattern low="17" high="18">0x</pattern>
-	<assert low="19" high="20">00</assert>   <!-- BASE_HI -->
+
 	<field name="SAMP" low="21" high="24" type="#cat5-samp">
 		<param name="HAS_SAMP"/>
 	</field>
@@ -124,7 +97,6 @@ SOFTWARE.
 	<field name="S" pos="50" type="bool" display=".s"/>
 	<field name="S2EN_BINDLESS" pos="51" type="bool"/>
 	<field name="O" pos="52" type="bool" display=".o"/>
-	<field name="P" pos="53" type="bool" display=".p"/>
 	<!-- OPC -->
 	<field name="JP" pos="59" type="bool" display="(jp)"/>
 	<field name="SY" pos="60" type="bool" display="(sy)"/>
@@ -142,7 +114,6 @@ SOFTWARE.
 		<map name="S">!!(src->flags & IR3_INSTR_S)</map>
 		<map name="S2EN_BINDLESS">!!(src->flags & (IR3_INSTR_S2EN | IR3_INSTR_B))</map>
 		<map name="O">!!(src->flags & IR3_INSTR_O)</map>
-		<map name="P">!!(src->flags & IR3_INSTR_P)</map>
 		<map name="DESC_MODE">extract_cat5_DESC_MODE(src)</map>
 		<!--
 			TODO the src order is currently a bit messy due to ir3 using srcs[0]
@@ -154,7 +125,43 @@ SOFTWARE.
 	</encode>
 </bitset>
 
-<bitset name="isam" extends="#instruction-cat5">
+<bitset name="#instruction-cat5-tex" extends="#instruction-cat5">
+	<override>
+		<expr>{S2EN_BINDLESS}</expr>
+		<doc>
+			The s2en (indirect) or bindless case
+		</doc>
+		<display>
+			{SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{NONUNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
+		</display>
+		<field name="BASE_HI" low="19" high="20" type="uint"/>
+		<field name="SRC3" low="21" high="28" type="#cat5-src3">
+			<param name="BINDLESS"/>
+			<param name="DESC_MODE"/>
+			<param name="HAS_SAMP"/>
+			<param name="HAS_TEX"/>
+		</field>
+		<field name="DESC_MODE" low="29" high="31" type="#cat5-s2en-bindless-desc-mode"/>
+		<field name="BASE" pos="47" type="#cat5-s2en-bindless-base">
+			<param name="BINDLESS"/>
+			<param name="BASE_HI"/>
+		</field>
+		<derived name="BINDLESS" expr="#cat5-s2enb-is-bindless" type="bool"/>
+		<derived name="S2EN" expr="#cat5-s2enb-is-indirect" type="bool" display=".s2en"/>
+		<derived name="UNIFORM" expr="#cat5-s2enb-is-uniform" type="bool" display=".uniform"/>
+		<derived name="NONUNIFORM" expr="#cat5-s2enb-is-nonuniform" type="bool" display=".nonuniform"/>
+		<derived name="A1" expr="#cat5-s2enb-uses_a1" type="bool" display=", a1.x"/>
+	</override>
+
+	<assert low="19" high="20">00</assert>   <!-- BASE_HI -->
+	<field name="P" pos="53" type="bool" display=".p"/>
+
+	<encode>
+		<map name="P">!!(src->flags & IR3_INSTR_P)</map>
+	</encode>
+</bitset>
+
+<bitset name="isam" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00000</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -162,7 +169,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="isaml" extends="#instruction-cat5">
+<bitset name="isaml" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00001</pattern>
 	<derived name="NUM_SRC" expr="#two" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -170,7 +177,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="isamm" extends="#instruction-cat5">
+<bitset name="isamm" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00010</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -178,7 +185,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="sam" extends="#instruction-cat5">
+<bitset name="sam" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00011</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -186,7 +193,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samb" extends="#instruction-cat5">
+<bitset name="samb" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00100</pattern>
 	<derived name="NUM_SRC" expr="#two" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -194,7 +201,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="saml" extends="#instruction-cat5">
+<bitset name="saml" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00101</pattern>
 	<derived name="NUM_SRC" expr="#two" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -202,7 +209,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samgq" extends="#instruction-cat5">
+<bitset name="samgq" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00110</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -210,7 +217,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="getlod" extends="#instruction-cat5">
+<bitset name="getlod" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">00111</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -218,7 +225,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="conv" extends="#instruction-cat5">
+<bitset name="conv" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01000</pattern>
 	<derived name="NUM_SRC" expr="#two" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -226,7 +233,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="convm" extends="#instruction-cat5">
+<bitset name="convm" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01001</pattern>
 	<derived name="NUM_SRC" expr="#two" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -234,7 +241,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="getsize" extends="#instruction-cat5">
+<bitset name="getsize" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01010</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -242,7 +249,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="getbuf" extends="#instruction-cat5">
+<bitset name="getbuf" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01011</pattern>
 	<derived name="NUM_SRC" expr="#zero" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -250,7 +257,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="getpos" extends="#instruction-cat5">
+<bitset name="getpos" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01100</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -258,7 +265,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="getinfo" extends="#instruction-cat5">
+<bitset name="getinfo" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01101</pattern>
 	<derived name="NUM_SRC" expr="#zero" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -266,7 +273,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="dsx" extends="#instruction-cat5">
+<bitset name="dsx" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01110</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -274,7 +281,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="dsy" extends="#instruction-cat5">
+<bitset name="dsy" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">01111</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -282,7 +289,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="gather4r" extends="#instruction-cat5">
+<bitset name="gather4r" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10000</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -290,7 +297,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="gather4g" extends="#instruction-cat5">
+<bitset name="gather4g" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10001</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -298,7 +305,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="gather4b" extends="#instruction-cat5">
+<bitset name="gather4b" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10010</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -306,7 +313,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="gather4a" extends="#instruction-cat5">
+<bitset name="gather4a" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10011</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -314,7 +321,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samgp0" extends="#instruction-cat5">
+<bitset name="samgp0" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10100</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -322,7 +329,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samgp1" extends="#instruction-cat5">
+<bitset name="samgp1" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10101</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -330,7 +337,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samgp2" extends="#instruction-cat5">
+<bitset name="samgp2" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10110</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -338,7 +345,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="samgp3" extends="#instruction-cat5">
+<bitset name="samgp3" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">10111</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#true" type="bool"/>
@@ -346,7 +353,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="dsxpp.1" extends="#instruction-cat5">
+<bitset name="dsxpp.1" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">11000</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -354,7 +361,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#false" type="bool"/>
 </bitset>
 
-<bitset name="dsypp.1" extends="#instruction-cat5">
+<bitset name="dsypp.1" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">11001</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -362,7 +369,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#false" type="bool"/>
 </bitset>
 
-<bitset name="rgetpos" extends="#instruction-cat5">
+<bitset name="rgetpos" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">11010</pattern>
 	<derived name="NUM_SRC" expr="#one" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -370,7 +377,7 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
-<bitset name="rgetinfo" extends="#instruction-cat5">
+<bitset name="rgetinfo" extends="#instruction-cat5-tex">
 	<pattern low="54" high="58">11011</pattern>
 	<derived name="NUM_SRC" expr="#zero" type="uint"/>
 	<derived name="HAS_SAMP" expr="#false" type="bool"/>
@@ -378,6 +385,105 @@ SOFTWARE.
 	<derived name="HAS_TYPE" expr="#true" type="bool"/>
 </bitset>
 
+<bitset name="brcst.active" extends="#instruction-cat5">
+	<doc>
+		The subgroup is divided into (subgroup_size / CLUSTER_SIZE)
+		clusters. For each cluster brcst.active.w does:
+
+		Given a cluster of fibers f_0, f_1, ..., f_{CLUSTER_SIZE-1} brcst
+		broadcasts the SRC value from the fiber f_{CLUSTER_SIZE/2-1}
+		to fibers f_{CLUSTER_SIZE/2}, ..., f_{CLUSTER_SIZE-1}. The DST reg
+		in other fibers is unaffected. If fiber f_{CLUSTER_SIZE/2-1} is
+		inactive the value to broadcast is taken from lower fibers
+		f_{CLUSTER_SIZE/2-2}, f_{CLUSTER_SIZE/2-3}, ...
+		If all fibers f_0, f_1, ..., f_{CLUSTER_SIZE/2-1} are inactive
+		the DST reg remains unchanged for all fibers.
+
+		It is necessary in order to implement arithmetic subgroup
+		operations with prefix sum (https://en.wikipedia.org/wiki/Prefix_sum).
+
+		For brcst.active.w8 without inactive fibers:
+			Fiber      | 0  1  2  3  4  5  6  7  | 8  9  10  11  12  13  14  15
+			SRC        | s0 s1 s2 s3 ...      s7 | s8  ...   s11 ...         s15
+			DST_before | d0 d1       ...      d7 | d8  ...                   d15
+			DST_after  | d0 d1 d2 d3 s3 s3 s3 s3 | d8  ...   d11 s11 s11 s11 s11
+
+		If fibers 2 and 3 are inactive:
+			Fiber      | 0  1  X  X  4  5  6  7  | ...
+			SRC        | s0 s1 X  X  ...      s7 | ...
+			DST_before | d0 d1       ...      d7 | ...
+			DST_after  | d0 d1 X  X  s1 s1 s1 s1 | ...
+	</doc>
+
+	<gen min="600"/>
+
+	<display>
+		{SY}{JP}{NAME}.w{CLUSTER_SIZE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}
+	</display>
+
+	<field name="W" low="19" high="20" type="uint"/>
+	<pattern low="53" high="58">111110</pattern> <!-- OPC -->
+
+	<derived name="CLUSTER_SIZE" type="uint">
+		<expr>
+			2 << {W}
+		</expr>
+	</derived>
+	<derived name="NUM_SRC" expr="#one" type="uint"/>
+	<derived name="HAS_SAMP" expr="#false" type="bool"/>
+	<derived name="HAS_TEX" expr="#false" type="bool"/>
+	<derived name="HAS_TYPE" expr="#true" type="bool"/>
+
+	<encode>
+		<map name="W">util_logbase2(src->cat5.cluster_size) - 1</map>
+	</encode>
+</bitset>
+
+<bitset name="#instruction-cat5-quad-shuffle" extends="#instruction-cat5">
+	<gen min="600"/>
+
+	<display>
+		{SY}{JP}{NAME} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}
+	</display>
+
+	<pattern low="53" high="58">111111</pattern> <!-- OPC -->
+
+	<derived name="HAS_SAMP" expr="#false" type="bool"/>
+	<derived name="HAS_TEX" expr="#false" type="bool"/>
+	<derived name="HAS_TYPE" expr="#true" type="bool"/>
+</bitset>
+
+<bitset name="quad_shuffle.brcst" extends="#instruction-cat5-quad-shuffle">
+	<doc>subgroupQuadBroadcast</doc>
+
+	<pattern low="19" high="20">00</pattern>   <!-- Quad-shuffle variant -->
+
+	<derived name="NUM_SRC" expr="#two" type="uint"/>
+</bitset>
+
+<bitset name="quad_shuffle.horiz" extends="#instruction-cat5-quad-shuffle">
+	<doc>subgroupQuadSwapHorizontal</doc>
+
+	<pattern low="19" high="20">01</pattern>   <!-- Quad-shuffle variant -->
+
+	<derived name="NUM_SRC" expr="#one" type="uint"/>
+</bitset>
+
+<bitset name="quad_shuffle.vert" extends="#instruction-cat5-quad-shuffle">
+	<doc>subgroupQuadSwapVertical</doc>
+
+	<pattern low="19" high="20">10</pattern>   <!-- Quad-shuffle variant -->
+
+	<derived name="NUM_SRC" expr="#one" type="uint"/>
+</bitset>
+
+<bitset name="quad_shuffle.diag" extends="#instruction-cat5-quad-shuffle">
+	<doc>subgroupQuadSwapDiagonal</doc>
+
+	<pattern low="19" high="20">11</pattern>   <!-- Quad-shuffle variant -->
+
+	<derived name="NUM_SRC" expr="#one" type="uint"/>
+</bitset>
 
 <!--
 	All the magic for conditionally displaying various srcs, etc
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 220ac9f0401..bf3b4839214 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -827,6 +827,29 @@ SOFTWARE.
 	<pattern low="52" high="53">1x</pattern>
 </bitset>
 
+<bitset name="getfiberid" extends="#instruction-cat6-a6xx">
+	<doc>
+		GET Fiber ID (gl_SubgroupID)
+	</doc>
+
+	<gen min="600"/>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPE} {DST}
+	</display>
+
+	<pattern pos="0"           >0</pattern>
+	<pattern low="9"  high="10">xx</pattern>   <!-- D_MINUS_ONE -->
+	<pattern pos="11"          >x</pattern>    <!-- TYPED -->
+	<pattern low="14" high="19">100110</pattern>   <!-- OPC -->
+	<pattern low="20" high="23">11xx</pattern>
+	<pattern low="24" high="31">xxxxxxxx</pattern>    <!-- SRC2 -->
+	<field   low="32" high="39" name="DST" type="#reg-gpr"/>
+	<pattern low="41" high="48">xxxxxxxx</pattern>  <!-- SSBO/image binding point -->
+	<field   low="49" high="51" name="TYPE" type="#type"/>
+	<pattern low="52" high="53">1x</pattern>
+</bitset>
+
 <bitset name="resinfo.b" extends="#instruction-cat6-a6xx">
 	<doc>
 		RESourceINFO - returns image/ssbo dimensions (3 components)



More information about the mesa-commit mailing list