Mesa (master): intel/fs: Implement representation of SWSB cross-pipeline synchronization annotations.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Apr 16 08:39:02 UTC 2021


Module: Mesa
Branch: master
Commit: 12479abded50e7caf5544776f7b7ecf161c6c487
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=12479abded50e7caf5544776f7b7ecf161c6c487

Author: Francisco Jerez <currojerez at riseup.net>
Date:   Tue Apr  6 13:03:45 2021 -0700

intel/fs: Implement representation of SWSB cross-pipeline synchronization annotations.

The execution units of XeHP platforms have multiple asynchronous ALU
pipelines instead of (as far as software is concerned) the single
in-order pipeline that handled most ALU instructions except for
extended math in the original Xe.  It's now the compiler's
responsibility to identify cross-pipeline dependencies and insert
synchronization annotations whenever necessary, which are encoded as
some additional bits of the SWSB instruction field.

This commit represents the cross-pipeline synchronization annotations
as part of the existing tgl_swsb structure used for codegen.  The
existing tgl_swsb_*() helpers used by hand-crafted assembly are
extended to default to TGL_PIPE_ALL big-hammer synchronization in
order to ensure backwards compatibility with the existing assembly.
The following commits will extend the software scoreboard lowering
pass in order to keep track of cross-pipeline dependencies across IR
instructions, and insert more specific pipeline annotations in the
SWSB field.

The disassembler is also extended here to print out any existing
pipeline sync annotations.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>

---

 src/intel/compiler/brw_disasm.c     | 12 ++++++---
 src/intel/compiler/brw_eu_defines.h | 52 ++++++++++++++++++++++++++++++++-----
 src/intel/compiler/brw_eu_emit.c    |  2 +-
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index 330af60b2bd..99c69b2b196 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -1642,10 +1642,16 @@ qtr_ctrl(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst
 static int
 swsb(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst)
 {
-   const struct tgl_swsb swsb = tgl_swsb_decode(brw_inst_opcode(devinfo, inst),
-                                                brw_inst_swsb(devinfo, inst));
+   const enum opcode opcode = brw_inst_opcode(devinfo, inst);
+   const uint8_t x = brw_inst_swsb(devinfo, inst);
+   const struct tgl_swsb swsb = tgl_swsb_decode(devinfo, opcode, x);
    if (swsb.regdist)
-      format(file, " @%d", swsb.regdist);
+      format(file, " %s@%d",
+             (swsb.pipe == TGL_PIPE_FLOAT ? "F" :
+              swsb.pipe == TGL_PIPE_INT ? "I" :
+              swsb.pipe == TGL_PIPE_LONG ? "L" :
+              swsb.pipe == TGL_PIPE_ALL ? "A"  : "" ),
+             swsb.regdist);
    if (swsb.mode)
       format(file, " $%d%s", swsb.sbid,
              (swsb.mode & TGL_SBID_SET ? "" :
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index e9b3827599e..21c719fa136 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -35,6 +35,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include "util/macros.h"
+#include "dev/gen_device_info.h"
 
 /* The following hunk, up-to "Execution Unit" is used by both the
  * intel/compiler and i965 codebase. */
@@ -1097,12 +1098,34 @@ operator|=(tgl_sbid_mode &x, tgl_sbid_mode y)
 
 #endif
 
+/**
+ * TGL+ SWSB RegDist synchronization pipeline.
+ *
+ * On TGL all instructions that use the RegDist synchronization mechanism are
+ * considered to be executed as a single in-order pipeline, therefore only the
+ * TGL_PIPE_FLOAT pipeline is applicable.  On XeHP+ platforms there are two
+ * additional asynchronous ALU pipelines (which still execute instructions
+ * in-order and use the RegDist synchronization mechanism).  TGL_PIPE_NONE
+ * doesn't provide any RegDist pipeline synchronization information and allows
+ * the hardware to infer the pipeline based on the source types of the
+ * instruction.  TGL_PIPE_ALL can be used when synchronization with all ALU
+ * pipelines is intended.
+ */
+enum tgl_pipe {
+   TGL_PIPE_NONE = 0,
+   TGL_PIPE_FLOAT,
+   TGL_PIPE_INT,
+   TGL_PIPE_LONG,
+   TGL_PIPE_ALL
+};
+
 /**
  * Logical representation of the SWSB scheduling information of a hardware
  * instruction.  The binary representation is slightly more compact.
  */
 struct tgl_swsb {
    unsigned regdist : 3;
+   enum tgl_pipe pipe : 3;
    unsigned sbid : 4;
    enum tgl_sbid_mode mode : 3;
 };
@@ -1115,7 +1138,7 @@ struct tgl_swsb {
 static inline struct tgl_swsb
 tgl_swsb_regdist(unsigned d)
 {
-   const struct tgl_swsb swsb = { d };
+   const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE };
    assert(swsb.regdist == d);
    return swsb;
 }
@@ -1127,7 +1150,7 @@ tgl_swsb_regdist(unsigned d)
 static inline struct tgl_swsb
 tgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid)
 {
-   const struct tgl_swsb swsb = { 0, sbid, mode };
+   const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode };
    assert(swsb.sbid == sbid);
    return swsb;
 }
@@ -1151,6 +1174,7 @@ tgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist)
 {
    swsb.regdist = regdist;
    swsb.mode = swsb.mode & TGL_SBID_SET;
+   swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE);
    return swsb;
 }
 
@@ -1170,10 +1194,15 @@ tgl_swsb_src_dep(struct tgl_swsb swsb)
  * SWSB annotation.
  */
 static inline uint8_t
-tgl_swsb_encode(struct tgl_swsb swsb)
+tgl_swsb_encode(const struct gen_device_info *devinfo, struct tgl_swsb swsb)
 {
    if (!swsb.mode) {
-      return swsb.regdist;
+      const unsigned pipe = devinfo->verx10 < 125 ? 0 :
+         swsb.pipe == TGL_PIPE_FLOAT ? 0x10 :
+         swsb.pipe == TGL_PIPE_INT ? 0x18 :
+         swsb.pipe == TGL_PIPE_LONG ? 0x50 :
+         swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0;
+      return pipe | swsb.regdist;
    } else if (swsb.regdist) {
       return 0x80 | swsb.regdist << 4 | swsb.sbid;
    } else {
@@ -1187,10 +1216,12 @@ tgl_swsb_encode(struct tgl_swsb swsb)
  * tgl_swsb.
  */
 static inline struct tgl_swsb
-tgl_swsb_decode(enum opcode opcode, uint8_t x)
+tgl_swsb_decode(const struct gen_device_info *devinfo, const enum opcode opcode,
+                const uint8_t x)
 {
    if (x & 0x80) {
-      const struct tgl_swsb swsb = { (x & 0x70u) >> 4, x & 0xfu,
+      const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE,
+                                     x & 0xfu,
                                      (opcode == BRW_OPCODE_SEND ||
                                       opcode == BRW_OPCODE_SENDC ||
                                       opcode == BRW_OPCODE_MATH) ?
@@ -1203,7 +1234,14 @@ tgl_swsb_decode(enum opcode opcode, uint8_t x)
    } else if ((x & 0x70) == 0x40) {
       return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu);
    } else {
-      return tgl_swsb_regdist(x & 0x7u);
+      const struct tgl_swsb swsb = { x & 0x7u,
+                                     ((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT :
+                                      (x & 0x78) == 0x18 ? TGL_PIPE_INT :
+                                      (x & 0x78) == 0x50 ? TGL_PIPE_LONG :
+                                      (x & 0x78) == 0x8 ? TGL_PIPE_ALL :
+                                      TGL_PIPE_NONE) };
+      assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE);
+      return swsb;
    }
 }
 
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 211fa4c9633..2f53609ed9e 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -620,7 +620,7 @@ brw_inst_set_state(const struct gen_device_info *devinfo,
    brw_inst_set_access_mode(devinfo, insn, state->access_mode);
    brw_inst_set_mask_control(devinfo, insn, state->mask_control);
    if (devinfo->ver >= 12)
-      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb));
+      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
    brw_inst_set_saturate(devinfo, insn, state->saturate);
    brw_inst_set_pred_control(devinfo, insn, state->predicate);
    brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);



More information about the mesa-commit mailing list