[Beignet] [PATCH v2 6/8] [OCL20] gbe: atomic_long type support.

xionghu.luo at intel.com xionghu.luo at intel.com
Tue Mar 1 11:35:11 UTC 2016


From: Luo Xionghu <xionghu.luo at intel.com>

as slm doesn't support A64 stateless access, the atomic_long couldn't be
fully supported, we just add the code for further use.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 backend/src/backend/gen8_encoder.cpp       | 14 +++++----
 backend/src/backend/gen8_encoder.hpp       |  2 +-
 backend/src/backend/gen_encoder.cpp        |  2 +-
 backend/src/backend/gen_encoder.hpp        |  2 +-
 backend/src/backend/gen_insn_selection.cpp | 47 +++++++++++++++++++++---------
 backend/src/libocl/src/ocl_atomic.ll       | 12 ++++++++
 6 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index d320290..9af8cee 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -169,14 +169,17 @@ namespace gbe
     }
   }
 
-  unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+  unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) {
     Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
+    assert(srcNum <= 3);
 
     if (this->curr.execWidth == 8) {
-      msg_length = srcNum + 1;
-      response_length = 1;
+      msg_length = srcNum + 1 + type_long;
+      if(srcNum == 3 && type_long)
+        msg_length++;
+      response_length = 1 + type_long;
     } else if (this->curr.execWidth == 16) {
       msg_length = 2 * (srcNum + 1);
       response_length = 2;
@@ -189,7 +192,7 @@ namespace gbe
     gen8_insn->bits3.gen8_atomic_a64.bti = bti;
     gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
     gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
-    gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
+    gen8_insn->bits3.gen8_atomic_a64.data_size = type_long;
 
     return gen8_insn->bits3.ud;
   }
@@ -203,7 +206,8 @@ namespace gbe
     this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
     this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
     this->setSrc1(insn, GenRegister::immud(0));
-    setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
+    int type_long = (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) ? 1: 0;
+    setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum, type_long);
   }
 
   unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) {
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index 3e23df6..d83cde5 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -69,7 +69,7 @@ namespace gbe
                             GenRegister src1 = GenRegister::null());
     virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());
     virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
-    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
     virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t accN);
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 564f207..9cdb41d 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -594,7 +594,7 @@ namespace gbe
       NOT_SUPPORTED;
     return insn->bits3.ud;
   }
-  unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+  unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) {
     GBE_ASSERT(0);
     return 0;
   }
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 5b4f4c2..ecb5051 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -241,7 +241,7 @@ namespace gbe
                               unsigned msg_length, unsigned response_length,
                               bool header_present = false, bool end_of_thread = false);
     virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
-    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+    virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
     virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
     unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 14a1930..f982817 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5582,28 +5582,46 @@ namespace gbe
         sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
       } else if (simdWidth == 16) {
         vector<GenRegister> msgs;
+        RegisterFamily family = sel.getRegisterFamily(insn.getDst(0));
+        Type type = getType(family);
         for (unsigned k = 0; k < msgPayload; k++) {
-          msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
+          msgs.push_back(sel.selReg(sel.reg(family), type));
         }
         sel.push();
         /* first quarter */
         sel.curr.execWidth = 8;
         sel.curr.quarterControl = GEN_COMPRESSION_Q1;
         sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 0));
-        if(msgPayload > 1)
-          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
-        if(msgPayload > 2)
-          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+        if(msgPayload > 1) {
+          if(family == ir::FAMILY_QWORD)
+            sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 0));
+          else
+            sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
+        }
+        if(msgPayload > 2) {
+          if(family == ir::FAMILY_QWORD)
+            sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 0));
+          else
+            sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+        }
         sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
 
         /* second quarter */
         sel.curr.execWidth = 8;
         sel.curr.quarterControl = GEN_COMPRESSION_Q2;
         sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 1));
-        if(msgPayload > 1)
-          sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
-        if(msgPayload > 2)
-          sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+        if(msgPayload > 1) {
+          if(family == ir::FAMILY_QWORD)
+            sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 1));
+          else
+            sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
+        }
+        if(msgPayload > 2) {
+          if(family == ir::FAMILY_QWORD)
+            sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 1));
+          else
+            sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+        }
         sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
         sel.pop();
       }
@@ -5633,17 +5651,18 @@ namespace gbe
         msgPayload = srcNum;
       }
 
-      GenRegister dst  = sel.selReg(insn.getDst(0), TYPE_U32);
-      GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
+      Type type = getType(sel.getRegisterFamily(insn.getDst(0)));
+      GenRegister dst  = sel.selReg(insn.getDst(0), type);
+      GenRegister src0 = sel.selReg(insn.getAddressRegister(), type);
       GenRegister src1 = src0, src2 = src0;
-      if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
-      if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
+      if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), type);
+      if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), type);
 
       GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
       if (AM == AM_DynamicBti || AM == AM_StaticBti) {
         if (AM == AM_DynamicBti) {
           Register btiReg = insn.getBtiReg();
-          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM));
+          sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, type), sel.getBTITemps(AM));
         } else {
           unsigned SI = insn.getSurfaceIndex();
           sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, GenRegister::immud(SI), sel.getBTITemps(AM));
diff --git a/backend/src/libocl/src/ocl_atomic.ll b/backend/src/libocl/src/ocl_atomic.ll
index 6b789b3..38efac0 100644
--- a/backend/src/libocl/src/ocl_atomic.ll
+++ b/backend/src/libocl/src/ocl_atomic.ll
@@ -8,12 +8,24 @@ entry:
     ret i32 %0
 }
 
+define i32 @__gen_ocl_atomic_exchangef(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
+entry:
+    %0 = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %value seq_cst
+    ret i32 %0
+}
+
 define i32 @__gen_ocl_atomic_fetch_add32(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
 entry:
     %0 = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %value seq_cst
     ret i32 %0
 }
 
+define i32 @__gen_ocl_atomic_fetch_addf(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
+entry:
+    %0 = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %value seq_cst
+    ret i32 %0
+}
+
 define i32 @__gen_ocl_atomic_fetch_sub32(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
 entry:
     %0 = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %value seq_cst
-- 
2.1.4



More information about the Beignet mailing list