[Beignet] [PATCH] Add memory fence before barrier to support global memory barrier.

Yang Rong rong.r.yang at intel.com
Mon Jun 17 00:13:05 PDT 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/backend/gen_context.cpp                |    6 ++++++
 backend/src/backend/gen_context.hpp                |    1 +
 backend/src/backend/gen_defs.hpp                   |   14 ++++++++++++++
 backend/src/backend/gen_encoder.cpp                |    9 +++++++++
 backend/src/backend/gen_encoder.hpp                |    2 ++
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |    1 +
 backend/src/backend/gen_insn_scheduling.cpp        |   20 ++++++++++++++------
 backend/src/backend/gen_insn_selection.cpp         |   19 +++++++++++++++----
 backend/src/backend/gen_insn_selection.hxx         |    1 +
 9 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 055c8fc..af651e7 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -193,6 +193,12 @@ namespace gbe
     p->BARRIER(src);
   }
 
+  void GenContext::emitFenceInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    p->FENCE(dst);
+    p->MOV(dst, dst);
+  }
+
   void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 7c28bdf..1566cbb 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -85,6 +85,7 @@ namespace gbe
     void emitNoOpInstruction(const SelectionInstruction &insn);
     void emitWaitInstruction(const SelectionInstruction &insn);
     void emitBarrierInstruction(const SelectionInstruction &insn);
+    void emitFenceInstruction(const SelectionInstruction &insn);
     void emitMathInstruction(const SelectionInstruction &insn);
     void emitUntypedReadInstruction(const SelectionInstruction &insn);
     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index c7a1581..f4e4938 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -765,6 +765,20 @@ struct GenInstruction
       uint32_t end_of_thread:1;
     } gen7_typed_rw;
 
+    /*! Memory fence */
+    struct {
+      uint32_t bti:8;
+      uint32_t ingored:5;
+      uint32_t commit_enable:1;
+      uint32_t msg_type:4;
+      uint32_t pad2:1;
+      uint32_t header_present:1;
+      uint32_t response_length:5;
+      uint32_t msg_length:4;
+      uint32_t pad3:2;
+      uint32_t end_of_thread:1;
+    } gen7_memory_fence;
+
     struct {
       uint32_t src1_subreg_nr_high:1;
       uint32_t src1_reg_nr:8;
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b65cc94..859a1b9 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -707,6 +707,15 @@ namespace gbe
      insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
      insn->bits3.msg_gateway.notify = 0x1;
   }
+  void GenEncoder::FENCE(GenRegister dst) {
+    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    this->setHeader(insn);
+    this->setDst(insn, dst);
+    this->setSrc0(insn, dst);
+    setMessageDescriptor(this, insn, GEN_SFID_DATAPORT_DATA_CACHE, 1, 1, 1);
+    insn->bits3.gen7_memory_fence.msg_type = GEN_MEM_FENCE;
+    insn->bits3.gen7_memory_fence.commit_enable = 0x1;
+  }
 
   void GenEncoder::JMPI(GenRegister src) {
     alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 83d83d2..c98774f 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -118,6 +118,8 @@ namespace gbe
 #undef ALU3
     /*! Barrier message (to synchronize threads of a workgroup) */
     void BARRIER(GenRegister src);
+    /*! Memory fence message (to order loads and stores between threads) */
+    void FENCE(GenRegister dst);
     /*! Jump indexed instruction */
     void JMPI(GenRegister src);
     /*! Compare instructions */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index a2c0fba..098d9ec 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -11,6 +11,7 @@ DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
 DECL_GEN7_SCHEDULE(Wait,            20,        2,        2)
 DECL_GEN7_SCHEDULE(Math,            20,        4,        2)
 DECL_GEN7_SCHEDULE(Barrier,         80,        1,        1)
+DECL_GEN7_SCHEDULE(Fence,           80,        1,        1)
 DECL_GEN7_SCHEDULE(UntypedRead,     80,        1,        1)
 DECL_GEN7_SCHEDULE(UntypedWrite,    80,        1,        1)
 DECL_GEN7_SCHEDULE(ByteGather,      80,        1,        1)
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 95eedfe..cb990be 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -305,7 +305,7 @@ namespace gbe
       return simdWidth == 8 ? physical.nr : physical.nr / 2;
     }
     // We use virtual registers since allocation is not done yet
-    else 
+    else
       return reg.value.reg;
   }
 
@@ -345,7 +345,9 @@ namespace gbe
     }
 
     // Consider barriers and wait write to memory
-    if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
       const uint32_t local = this->getIndex(0xfe);
       const uint32_t global = this->getIndex(0x00);
       this->nodes[local] = this->nodes[global] = node;
@@ -424,7 +426,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are reading memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(node, local);
@@ -450,7 +454,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are writing memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+    if (insn.opcode == SEL_OP_BARRIER ||
+        insn.opcode == SEL_OP_FENCE ||
+        insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(node, local);
@@ -482,7 +488,9 @@ namespace gbe
       }
 
       // Consider barriers and wait are reading memory (local and global)
-      if (insn.opcode == SEL_OP_BARRIER || insn.opcode == SEL_OP_WAIT) {
+      if (insn.opcode == SEL_OP_BARRIER ||
+          insn.opcode == SEL_OP_FENCE ||
+          insn.opcode == SEL_OP_WAIT) {
         const uint32_t local = tracker.getIndex(0xfe);
         const uint32_t global = tracker.getIndex(0x00);
         tracker.addDependency(local, node);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 88f9e94..4e7cebd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -431,6 +431,8 @@ namespace gbe
 #undef ALU3
     /*! Encode a barrier instruction */
     void BARRIER(GenRegister src);
+    /*! Encode a barrier instruction */
+    void FENCE(GenRegister dst);
     /*! Encode a label instruction */
     void LABEL(ir::LabelIndex label);
     /*! Jump indexed instruction */
@@ -682,6 +684,11 @@ namespace gbe
     insn->src(0) = src;
   }
 
+  void Selection::Opaque::FENCE(GenRegister dst) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_FENCE, 1, 0);
+    insn->dst(0) = dst;
+  }
+
   void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
     insn->src(0) = src;
@@ -1607,17 +1614,21 @@ namespace gbe
     INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
     {
       using namespace ir;
-      const uint32_t params = insn.getParameters();
-      GBE_ASSERTM(params == syncLocalBarrier,
-                  "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
-                  "for the synchronization primitives");
       const ir::Register reg = sel.reg(FAMILY_DWORD);
 
+      const uint32_t params = insn.getParameters();
+      //need to double check local barrier whether need fence or not
+      if(params == syncGlobalBarrier) {
+        const ir::Register fenceDst = sel.reg(FAMILY_DWORD);
+        sel.FENCE(sel.selReg(fenceDst, ir::TYPE_U32));
+      }
+
       sel.push();
         sel.curr.predicate = GEN_PREDICATE_NONE;
         sel.curr.execWidth = 8;
         sel.curr.physicalFlag = 0;
         sel.curr.noMask = 1;
+
         sel.SHL(GenRegister::ud8grf(reg),
                 GenRegister::ud1grf(ocl::threadn),
                 GenRegister::immud(0x9));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 455bb92..789c81c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -29,6 +29,7 @@ DECL_SELECTION_IR(NOP, NoOpInstruction)
 DECL_SELECTION_IR(WAIT, WaitInstruction)
 DECL_SELECTION_IR(MATH, MathInstruction)
 DECL_SELECTION_IR(BARRIER, BarrierInstruction)
+DECL_SELECTION_IR(FENCE, FenceInstruction)
 DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
 DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
 DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
-- 
1.7.10.4



More information about the Beignet mailing list