Mesa (main): intel/compiler: Add support for LSC fence operations

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 30 16:30:45 UTC 2021


Module: Mesa
Branch: main
Commit: b67f1ff465092010d672ca55637b30a72d795e7d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b67f1ff465092010d672ca55637b30a72d795e7d

Author: Sagar Ghuge <sagar.ghuge at intel.com>
Date:   Sat Jul 11 18:33:05 2020 -0700

intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason at jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

---

 src/intel/compiler/brw_eu.h               | 15 ++++++++++++
 src/intel/compiler/brw_eu_emit.c          | 31 ++++++++++++++++++++++++-
 src/intel/compiler/brw_fs_nir.cpp         | 38 +++++++++++++++++++++++++------
 src/intel/compiler/brw_ir_performance.cpp |  3 +++
 4 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 82b06a55e23..abd8f5a9f74 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -1154,6 +1154,21 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
    return GET_BITS(desc, 18, 18);
 }
 
+static inline uint32_t
+lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
+                   enum lsc_fence_scope scope,
+                   enum lsc_flush_type flush_type,
+                   bool route_to_lsc)
+{
+   assert(devinfo->has_lsc);
+   return SET_BITS(LSC_OP_FENCE, 5, 0) |
+          SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
+          SET_BITS(scope, 11, 9) |
+          SET_BITS(flush_type, 14, 12) |
+          SET_BITS(route_to_lsc, 18, 18) |
+          SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
+}
+
 static inline uint32_t
 brw_mdc_sm2(unsigned exec_size)
 {
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index ba73c0049ef..b81cbb9c949 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3235,6 +3235,30 @@ brw_set_memory_fence_message(struct brw_codegen *p,
    brw_inst_set_binding_table_index(devinfo, insn, bti);
 }
 
+static void
+gfx12_set_memory_fence_message(struct brw_codegen *p,
+                               struct brw_inst *insn,
+                               enum brw_message_target sfid)
+{
+   const unsigned mlen = 1; /* g0 header */
+    /* Completion signaled by write to register. No data returned. */
+   const unsigned rlen = 1;
+
+   brw_inst_set_sfid(p->devinfo, insn, sfid);
+
+   enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
+   enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
+
+   if (sfid == GFX12_SFID_TGM) {
+      scope = LSC_FENCE_GPU;
+      flush_type = LSC_FLUSH_TYPE_EVICT;
+   }
+
+   brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
+                                            flush_type, false) |
+                         brw_message_desc(p->devinfo, mlen, rlen, false));
+}
+
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
@@ -3257,7 +3281,12 @@ brw_memory_fence(struct brw_codegen *p,
    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
    brw_set_dest(p, insn, dst);
    brw_set_src0(p, insn, src);
-   brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
+
+   /* All DG2 hardware requires LSC for fence messages, even A-step */
+   if (devinfo->has_lsc)
+      gfx12_set_memory_fence_message(p, insn, sfid);
+   else
+      brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
 }
 
 void
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 64a714516ad..f0ec7dc839b 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4270,7 +4270,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_memory_barrier:
    case nir_intrinsic_begin_invocation_interlock:
    case nir_intrinsic_end_invocation_interlock: {
-      bool l3_fence, slm_fence;
+      bool l3_fence, slm_fence, tgm_fence = false;
       const enum opcode opcode =
          instr->intrinsic == nir_intrinsic_begin_invocation_interlock ?
          SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
@@ -4282,6 +4282,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                              nir_var_mem_ssbo |
                              nir_var_mem_global);
          slm_fence = modes & nir_var_mem_shared;
+
+         /* NIR currently doesn't have an image mode */
+         if (devinfo->has_lsc)
+            tgm_fence = modes & nir_var_mem_ssbo;
          break;
       }
 
@@ -4312,6 +4316,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier ||
                      instr->intrinsic == nir_intrinsic_memory_barrier ||
                      instr->intrinsic == nir_intrinsic_memory_barrier_shared;
+         tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image;
          break;
       }
 
@@ -4354,7 +4359,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          devinfo->ver >= 10; /* HSD ES # 1404612949 */
 
       unsigned fence_regs_count = 0;
-      fs_reg fence_regs[2] = {};
+      fs_reg fence_regs[3] = {};
 
       const fs_builder ubld = bld.group(8, 0);
 
@@ -4364,8 +4369,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                       ubld.vgrf(BRW_REGISTER_TYPE_UD),
                       brw_vec8_grf(0, 0),
                       brw_imm_ud(commit_enable),
-                      brw_imm_ud(/* bti */ 0));
-         fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+                      brw_imm_ud(0 /* BTI; ignored for LSC */));
+
+         fence->sfid = devinfo->has_lsc ?
+                       GFX12_SFID_UGM :
+                       GFX7_SFID_DATAPORT_DATA_CACHE;
 
          fence_regs[fence_regs_count++] = fence->dst;
 
@@ -4380,6 +4388,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
             fence_regs[fence_regs_count++] = render_fence->dst;
          }
+
+         /* Translate l3_fence into untyped and typed fence on XeHP */
+         if (devinfo->has_lsc && tgm_fence) {
+            fs_inst *fence =
+               ubld.emit(opcode,
+                         ubld.vgrf(BRW_REGISTER_TYPE_UD),
+                         brw_vec8_grf(0, 0),
+                         brw_imm_ud(commit_enable),
+                         brw_imm_ud(/* ignored */0));
+
+            fence->sfid = GFX12_SFID_TGM;
+            fence_regs[fence_regs_count++] = fence->dst;
+         }
       }
 
       if (slm_fence) {
@@ -4389,13 +4410,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                       ubld.vgrf(BRW_REGISTER_TYPE_UD),
                       brw_vec8_grf(0, 0),
                       brw_imm_ud(commit_enable),
-                      brw_imm_ud(GFX7_BTI_SLM));
-         fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+                      brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */));
+         if (devinfo->has_lsc)
+            fence->sfid = GFX12_SFID_SLM;
+         else
+            fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
 
          fence_regs[fence_regs_count++] = fence->dst;
       }
 
-      assert(fence_regs_count <= 2);
+      assert(fence_regs_count <= 3);
 
       if (stall || fence_regs_count == 0) {
          ubld.exec_all().group(1, 0).emit(
diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp
index a9f93609561..fdffdfdaf83 100644
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@@ -939,6 +939,9 @@ namespace {
                abort();
 
          case GFX7_SFID_DATAPORT_DATA_CACHE:
+         case GFX12_SFID_SLM:
+         case GFX12_SFID_TGM:
+         case GFX12_SFID_UGM:
          case HSW_SFID_DATAPORT_DATA_CACHE_1:
             if (devinfo->ver >= 7)
                return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,



More information about the mesa-commit mailing list