Mesa (main): intel/compiler: Add support for LSC fence operations
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 30 16:30:45 UTC 2021
Module: Mesa
Branch: main
Commit: b67f1ff465092010d672ca55637b30a72d795e7d
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b67f1ff465092010d672ca55637b30a72d795e7d
Author: Sagar Ghuge <sagar.ghuge at intel.com>
Date: Sat Jul 11 18:33:05 2020 -0700
intel/compiler: Add support for LSC fence operations
v2 (Jason Ekstrand):
- Squash SLM and global fence ops together
v3 (Jason Ekstrand):
- Rework to use message descriptors instead of instruction fields
v4 (Jason Ekstrand):
- Don't pass BTI into back-end emit function. Always use FLAT.
Co-authored-by: Jason Ekstrand <jason at jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>
---
src/intel/compiler/brw_eu.h | 15 ++++++++++++
src/intel/compiler/brw_eu_emit.c | 31 ++++++++++++++++++++++++-
src/intel/compiler/brw_fs_nir.cpp | 38 +++++++++++++++++++++++++------
src/intel/compiler/brw_ir_performance.cpp | 3 +++
4 files changed, 79 insertions(+), 8 deletions(-)
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 82b06a55e23..abd8f5a9f74 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -1154,6 +1154,21 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
return GET_BITS(desc, 18, 18);
}
+static inline uint32_t
+lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
+ enum lsc_fence_scope scope,
+ enum lsc_flush_type flush_type,
+ bool route_to_lsc)
+{
+ assert(devinfo->has_lsc);
+ return SET_BITS(LSC_OP_FENCE, 5, 0) |
+ SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
+ SET_BITS(scope, 11, 9) |
+ SET_BITS(flush_type, 14, 12) |
+ SET_BITS(route_to_lsc, 18, 18) |
+ SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
+}
+
static inline uint32_t
brw_mdc_sm2(unsigned exec_size)
{
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index ba73c0049ef..b81cbb9c949 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3235,6 +3235,30 @@ brw_set_memory_fence_message(struct brw_codegen *p,
brw_inst_set_binding_table_index(devinfo, insn, bti);
}
+static void
+gfx12_set_memory_fence_message(struct brw_codegen *p,
+ struct brw_inst *insn,
+ enum brw_message_target sfid)
+{
+ const unsigned mlen = 1; /* g0 header */
+ /* Completion signaled by write to register. No data returned. */
+ const unsigned rlen = 1;
+
+ brw_inst_set_sfid(p->devinfo, insn, sfid);
+
+ enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
+ enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
+
+ if (sfid == GFX12_SFID_TGM) {
+ scope = LSC_FENCE_GPU;
+ flush_type = LSC_FLUSH_TYPE_EVICT;
+ }
+
+ brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
+ flush_type, false) |
+ brw_message_desc(p->devinfo, mlen, rlen, false));
+}
+
void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
@@ -3257,7 +3281,12 @@ brw_memory_fence(struct brw_codegen *p,
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, src);
- brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
+
+ /* All DG2 hardware requires LSC for fence messages, even A-step */
+ if (devinfo->has_lsc)
+ gfx12_set_memory_fence_message(p, insn, sfid);
+ else
+ brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 64a714516ad..f0ec7dc839b 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4270,7 +4270,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_memory_barrier:
case nir_intrinsic_begin_invocation_interlock:
case nir_intrinsic_end_invocation_interlock: {
- bool l3_fence, slm_fence;
+ bool l3_fence, slm_fence, tgm_fence = false;
const enum opcode opcode =
instr->intrinsic == nir_intrinsic_begin_invocation_interlock ?
SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
@@ -4282,6 +4282,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_var_mem_ssbo |
nir_var_mem_global);
slm_fence = modes & nir_var_mem_shared;
+
+ /* NIR currently doesn't have an image mode */
+ if (devinfo->has_lsc)
+ tgm_fence = modes & nir_var_mem_ssbo;
break;
}
@@ -4312,6 +4316,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier_shared;
+ tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image;
break;
}
@@ -4354,7 +4359,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
devinfo->ver >= 10; /* HSD ES # 1404612949 */
unsigned fence_regs_count = 0;
- fs_reg fence_regs[2] = {};
+ fs_reg fence_regs[3] = {};
const fs_builder ubld = bld.group(8, 0);
@@ -4364,8 +4369,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
- brw_imm_ud(/* bti */ 0));
- fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+ brw_imm_ud(0 /* BTI; ignored for LSC */));
+
+ fence->sfid = devinfo->has_lsc ?
+ GFX12_SFID_UGM :
+ GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
@@ -4380,6 +4388,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fence_regs[fence_regs_count++] = render_fence->dst;
}
+
+ /* Translate l3_fence into untyped and typed fence on XeHP */
+ if (devinfo->has_lsc && tgm_fence) {
+ fs_inst *fence =
+ ubld.emit(opcode,
+ ubld.vgrf(BRW_REGISTER_TYPE_UD),
+ brw_vec8_grf(0, 0),
+ brw_imm_ud(commit_enable),
+ brw_imm_ud(/* ignored */0));
+
+ fence->sfid = GFX12_SFID_TGM;
+ fence_regs[fence_regs_count++] = fence->dst;
+ }
}
if (slm_fence) {
@@ -4389,13 +4410,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
- brw_imm_ud(GFX7_BTI_SLM));
- fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
+ brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */));
+ if (devinfo->has_lsc)
+ fence->sfid = GFX12_SFID_SLM;
+ else
+ fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
}
- assert(fence_regs_count <= 2);
+ assert(fence_regs_count <= 3);
if (stall || fence_regs_count == 0) {
ubld.exec_all().group(1, 0).emit(
diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp
index a9f93609561..fdffdfdaf83 100644
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@@ -939,6 +939,9 @@ namespace {
abort();
case GFX7_SFID_DATAPORT_DATA_CACHE:
+ case GFX12_SFID_SLM:
+ case GFX12_SFID_TGM:
+ case GFX12_SFID_UGM:
case HSW_SFID_DATAPORT_DATA_CACHE_1:
if (devinfo->ver >= 7)
return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,
More information about the mesa-commit
mailing list