[Beignet] [PATCH] Add memory fence before barrier to support global memory barrier.
Yang Rong
rong.r.yang at intel.com
Mon Jun 17 00:13:05 PDT 2013
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen_context.cpp | 6 ++++++
backend/src/backend/gen_context.hpp | 1 +
backend/src/backend/gen_defs.hpp | 14 ++++++++++++++
backend/src/backend/gen_encoder.cpp | 9 +++++++++
backend/src/backend/gen_encoder.hpp | 2 ++
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_scheduling.cpp | 20 ++++++++++++++------
backend/src/backend/gen_insn_selection.cpp | 19 +++++++++++++++----
backend/src/backend/gen_insn_selection.hxx | 1 +
9 files changed, 63 insertions(+), 10 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 055c8fc..af651e7 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -193,6 +193,12 @@ namespace gbe
p->BARRIER(src);
}
+ void GenContext::emitFenceInstruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ p->FENCE(dst);
+ p->MOV(dst, dst);
+ }
+
void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 7c28bdf..1566cbb 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -85,6 +85,7 @@ namespace gbe
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
void emitBarrierInstruction(const SelectionInstruction &insn);
+ void emitFenceInstruction(const SelectionInstruction &insn);
void emitMathInstruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index c7a1581..f4e4938 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -765,6 +765,20 @@ struct GenInstruction
uint32_t end_of_thread:1;
} gen7_typed_rw;
+ /*! Memory fence */
+ struct {
+ uint32_t bti:8;
+ uint32_t ingored:5;
+ uint32_t commit_enable:1;
+ uint32_t msg_type:4;
+ uint32_t pad2:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad3:2;
+ uint32_t end_of_thread:1;
+ } gen7_memory_fence;
+
struct {
uint32_t src1_subreg_nr_high:1;
uint32_t src1_reg_nr:8;
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b65cc94..859a1b9 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -707,6 +707,15 @@ namespace gbe
insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
insn->bits3.msg_gateway.notify = 0x1;
}
+ void GenEncoder::FENCE(GenRegister dst) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ this->setHeader(insn);
+ this->setDst(insn, dst);
+ this->setSrc0(insn, dst);
+ setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1);
+ insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE;
+ insn->bits3.gen7_memory_fence.commit_enable = 0x1;
+ }
void GenEncoder::JMPI(GenRegister src) {
alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 83d83d2..c98774f 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -118,6 +118,8 @@ namespace gbe
#undef ALU3
/*! Barrier message (to synchronize threads of a workgroup) */
void BARRIER(GenRegister src);
+ /*! Memory fence message (to order loads and stores between threads) */
+ void FENCE(GenRegister dst);
/*! Jump indexed instruction */
void JMPI(GenRegister src);
/*! Compare instructions */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a2c0fba..098d9ec 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -11,6 +11,7 @@ DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
DECL_GEN7_SCHEDULE(Math, 20, 4, 2)
DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1)
+DECL_GEN7_SCHEDULE(Fence, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1)
DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 95eedfe..cb990be 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -305,7 +305,7 @@ namespace gbe
return simdWidth == 8 ? physical.nr : physical.nr / 2;
}
// We use virtual registers since allocation is not done yet
- else
+ else
return reg.value.reg;
}
@@ -345,7 +345,9 @@ namespace gbe
}
// Consider barriers and wait write to memory
- if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ if (insn.opcode == SEL_OP_BARRIER ||
+ insn.opcode == SEL_OP_FENCE ||
+ insn.opcode == SEL_OP_WAIT) {
const uint32_t local = this->getIndex(0xfe);
const uint32_t global = this->getIndex(0x00);
this->nodes[local] = this->nodes[global] = node;
@@ -424,7 +426,9 @@ namespace gbe
}
// Consider barriers and wait are reading memory (local and global)
- if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ if (insn.opcode == SEL_OP_BARRIER ||
+ insn.opcode == SEL_OP_FENCE ||
+ insn.opcode == SEL_OP_WAIT) {
const uint32_t local = tracker.getIndex(0xfe);
const uint32_t global = tracker.getIndex(0x00);
tracker.addDependency(node, local);
@@ -450,7 +454,9 @@ namespace gbe
}
// Consider barriers and wait are writing memory (local and global)
- if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ if (insn.opcode == SEL_OP_BARRIER ||
+ insn.opcode == SEL_OP_FENCE ||
+ insn.opcode == SEL_OP_WAIT) {
const uint32_t local = tracker.getIndex(0xfe);
const uint32_t global = tracker.getIndex(0x00);
tracker.addDependency(node, local);
@@ -482,7 +488,9 @@ namespace gbe
}
// Consider barriers and wait are reading memory (local and global)
- if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+ if (insn.opcode == SEL_OP_BARRIER ||
+ insn.opcode == SEL_OP_FENCE ||
+ insn.opcode == SEL_OP_WAIT) {
const uint32_t local = tracker.getIndex(0xfe);
const uint32_t global = tracker.getIndex(0x00);
tracker.addDependency(local, node);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 88f9e94..4e7cebd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -431,6 +431,8 @@ namespace gbe
#undef ALU3
/*! Encode a barrier instruction */
void BARRIER(GenRegister src);
+ /*! Encode a barrier instruction */
+ void FENCE(GenRegister dst);
/*! Encode a label instruction */
void LABEL(ir::LabelIndex label);
/*! Jump indexed instruction */
@@ -682,6 +684,11 @@ namespace gbe
insn->src(0) = src;
}
+ void Selection::Opaque::FENCE(GenRegister dst) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0);
+ insn->dst(0) = dst;
+ }
+
void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
insn->src(0) = src;
@@ -1607,17 +1614,21 @@ namespace gbe
INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
{
using namespace ir;
- const uint32_t params = insn.getParameters();
- GBE_ASSERTM(params == syncLocalBarrier,
- "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
- "for the synchronization primitives");
const ir::Register reg = sel.reg(FAMILY_DWORD);
+ const uint32_t params = insn.getParameters();
+ //need to double check local barrier whether need fence or not
+ if(params == syncGlobalBarrier) {
+ const ir::Register fenceDst = sel.reg(FAMILY_DWORD);
+ sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32));
+ }
+
sel.push();
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.execWidth = 8;
sel.curr.physicalFlag = 0;
sel.curr.noMask = 1;
+
sel.SHL(GenRegister::ud8grf(reg),
GenRegister::ud1grf(ocl::threadn),
GenRegister::immud(0x9));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 455bb92..789c81c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -29,6 +29,7 @@ DECL_SELECTION_IR(NOP, NoOpInstruction)
DECL_SELECTION_IR(WAIT, WaitInstruction)
DECL_SELECTION_IR(MATH, MathInstruction)
DECL_SELECTION_IR(BARRIER, BarrierInstruction)
+DECL_SELECTION_IR(FENCE, FenceInstruction)
DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
--
1.7.10.4
More information about the Beignet
mailing list