[Mesa-dev] [PATCH 05/11] intel/compiler: Use null destination register for memory fence messages

Matt Turner mattst88 at gmail.com
Wed Mar 21 21:06:16 UTC 2018


>From Message Descriptor section in gfxspecs:

  "Memory fence messages without Commit Enable set do not return
   anything to the thread (response length is 0 and destination
   register is null)."

This fixes a GPU hang in simulation in the piglit test
arb_shader_image_load_store-shader-mem-barrier

The mem fence message doesn't send any data, and previously we were
setting the SEND's src0 to the same register as the destination. I've
kept that behavior, so src0 will now be the null register in a number of
cases, necessitating a few changes in the EU validator. The simulator
and real hardware seem to be okay with this.
---
 src/intel/compiler/brw_eu_emit.c        |  4 ++--
 src/intel/compiler/brw_eu_validate.c    | 13 +++++++++++--
 src/intel/compiler/brw_fs_nir.cpp       | 14 +++++++++++---
 src/intel/compiler/test_eu_validate.cpp |  9 +++++++++
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index f039af56d05..fe7fa8723e1 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3289,8 +3289,8 @@ brw_memory_fence(struct brw_codegen *p,
 {
    const struct gen_device_info *devinfo = p->devinfo;
    const bool commit_enable =
-      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
-      (devinfo->gen == 7 && !devinfo->is_haswell);
+      !(dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+        dst.nr == BRW_ARF_NULL);
    struct brw_inst *insn;
 
    brw_push_insn_state(p);
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index d3189d1ef5e..e16dfc3aaf3 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -168,6 +168,14 @@ src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *in
           brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
 }
 
+static bool
+is_mfence(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND &&
+          brw_inst_sfid(devinfo, inst) == GEN7_SFID_DATAPORT_DATA_CACHE &&
+          brw_inst_dp_msg_type(devinfo, inst) == GEN7_DATAPORT_DC_MEMORY_FENCE;
+}
+
 static unsigned
 num_sources_from_inst(const struct gen_device_info *devinfo,
                       const brw_inst *inst)
@@ -236,7 +244,7 @@ sources_not_null(const struct gen_device_info *devinfo,
    if (num_sources == 3)
       return (struct string){};
 
-   if (num_sources >= 1)
+   if (num_sources >= 1 && !is_mfence(devinfo, inst))
       ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
 
    if (num_sources == 2)
@@ -256,7 +264,8 @@ send_restrictions(const struct gen_device_info *devinfo,
                "send must use direct addressing");
 
       if (devinfo->gen >= 7) {
-         ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF");
+         ERROR_IF(!src0_is_grf(devinfo, inst) && !is_mfence(devinfo, inst),
+                  "send from non-GRF");
          ERROR_IF(brw_inst_eot(devinfo, inst) &&
                   brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
                   "send with EOT must use g112-g127");
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index dbd2105f7e9..063f0256829 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3859,9 +3859,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_memory_barrier_image:
    case nir_intrinsic_memory_barrier: {
       const fs_builder ubld = bld.group(8, 0);
-      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
-         ->size_written = 2 * REG_SIZE;
+      if (devinfo->gen == 7 && !devinfo->is_haswell) {
+         const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+            ->size_written = 2 * REG_SIZE;
+      } else {
+         const fs_reg tmp =
+            /* HSD ES #1404612949 */
+            devinfo->gen >= 10 ? ubld.vgrf(BRW_REGISTER_TYPE_UD)
+                               : bld.null_reg_d();
+         ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp);
+      }
       break;
    }
 
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 161db994b2b..8169f951b2d 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -168,6 +168,15 @@ TEST_P(validation_test, math_src1_null_reg)
    }
 }
 
+TEST_P(validation_test, mfence_src0_null_reg)
+{
+   /* On HSW+ mfence's src0 is the null register */
+   if (devinfo.gen >= 8 || devinfo.is_haswell) {
+      brw_memory_fence(p, null);
+      EXPECT_TRUE(validate(p));
+   }
+}
+
 TEST_P(validation_test, opcode46)
 {
    /* opcode 46 is "push" on Gen 4 and 5
-- 
2.16.1



More information about the mesa-dev mailing list