[Mesa-dev] [PATCH 20/21] intel/fs: Support SENDS in SHADER_OPCODE_SEND

Jason Ekstrand jason at jlekstrand.net
Sat Nov 17 02:47:48 UTC 2018


---
 src/intel/compiler/brw_fs.cpp           | 50 +++++++++++++++++++++++++
 src/intel/compiler/brw_fs.h             |  1 +
 src/intel/compiler/brw_fs_generator.cpp | 23 ++++++++----
 3 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 43d31ed2fd6..2d03d541684 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6655,11 +6655,61 @@ fs_visitor::optimize()
       OPT(lower_simd_width);
    }
 
+   OPT(fixup_sends_duplicate_payload);
+
    lower_uniform_pull_constant_loads();
 
    validate();
 }
 
+/**
+ * From the Skylake PRM Vol. 2a docs for sends:
+ *
+ *    "It is required that the second block of GRFs does not overlap with the
+ *    first block."
+ *
+ * There are plenty of cases where we may accidentally violate this due to
+ * having, for instance, both sources be the constant 0.  This little pass
+ * just adds a new vgrf for the second payload and copies it over.
+ */
+bool
+fs_visitor::fixup_sends_duplicate_payload()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+      if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+          regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
+                          inst->src[3], inst->ex_mlen * REG_SIZE)) {
+         fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),
+                             BRW_REGISTER_TYPE_UD);
+         /* Sadly, we've lost all notion of channels and bit sizes at this
+          * point.  Just WE_all it.
+          */
+         const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+         fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
+         fs_reg copy_dst = tmp;
+         for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
+            if (inst->ex_mlen == i + 1) {
+               /* Only one register left; do SIMD8 */
+               ibld.group(8, 0).MOV(copy_dst, copy_src);
+            } else {
+               ibld.MOV(copy_dst, copy_src);
+            }
+            copy_src = offset(copy_src, ibld, 1);
+            copy_dst = offset(copy_dst, ibld, 1);
+         }
+         inst->src[3] = tmp;
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 /**
  * Three source instruction must have a GRF/MRF destination register.
  * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index bac35ab8efb..eb3ea6d8631 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -103,6 +103,7 @@ public:
    void setup_vs_payload();
    void setup_gs_payload();
    void setup_cs_payload();
+   bool fixup_sends_duplicate_payload();
    void fixup_3src_null_dest();
    void assign_curb_setup();
    void calculate_urb_setup();
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 3a17b743e42..a410d25e5ba 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -258,11 +258,6 @@ fs_generator::generate_send(fs_inst *inst,
                             struct brw_reg payload,
                             struct brw_reg payload2)
 {
-   /* SENDS not yet supported */
-   assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0);
-   assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          payload2.nr == BRW_ARF_NULL);
-
    const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
                             dst.nr == BRW_ARF_NULL;
    const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
@@ -270,11 +265,23 @@ fs_generator::generate_send(fs_inst *inst,
    uint32_t desc_imm = inst->desc |
       brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
 
-   brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+   uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
+
+   if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
+      /* If we have any sort of extended descriptor, then we need SENDS.  This
+       * also covers the dual-payload case because ex_mlen goes in ex_desc.
+       */
+      brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
+                                      desc, desc_imm, ex_desc, ex_desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
+   } else {
+      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
+   }
 
    brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
-   if (inst->check_tdr)
-      brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
 }
 
 void
-- 
2.19.1



More information about the mesa-dev mailing list