[Beignet] [PATCH v2 6/8] [OCL20] gbe: atomic_long type support.
xionghu.luo at intel.com
xionghu.luo at intel.com
Tue Mar 1 11:35:11 UTC 2016
From: Luo Xionghu <xionghu.luo at intel.com>
as slm doesn't support A64 stateless access, the atomic_long couldn't be
fully supported, we just add the code for further use.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/backend/gen8_encoder.cpp | 14 +++++----
backend/src/backend/gen8_encoder.hpp | 2 +-
backend/src/backend/gen_encoder.cpp | 2 +-
backend/src/backend/gen_encoder.hpp | 2 +-
backend/src/backend/gen_insn_selection.cpp | 47 +++++++++++++++++++++---------
backend/src/libocl/src/ocl_atomic.ll | 12 ++++++++
6 files changed, 57 insertions(+), 22 deletions(-)
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index d320290..9af8cee 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -169,14 +169,17 @@ namespace gbe
}
}
- unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+ unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) {
Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
uint32_t msg_length = 0;
uint32_t response_length = 0;
+ assert(srcNum <= 3);
if (this->curr.execWidth == 8) {
- msg_length = srcNum + 1;
- response_length = 1;
+ msg_length = srcNum + 1 + type_long;
+ if(srcNum == 3 && type_long)
+ msg_length++;
+ response_length = 1 + type_long;
} else if (this->curr.execWidth == 16) {
msg_length = 2 * (srcNum + 1);
response_length = 2;
@@ -189,7 +192,7 @@ namespace gbe
gen8_insn->bits3.gen8_atomic_a64.bti = bti;
gen8_insn->bits3.gen8_atomic_a64.return_data = 1;
gen8_insn->bits3.gen8_atomic_a64.aop_type = function;
- gen8_insn->bits3.gen8_atomic_a64.data_size = 0;
+ gen8_insn->bits3.gen8_atomic_a64.data_size = type_long;
return gen8_insn->bits3.ud;
}
@@ -203,7 +206,8 @@ namespace gbe
this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
this->setSrc1(insn, GenRegister::immud(0));
- setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum);
+ int type_long = (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) ? 1: 0;
+ setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum, type_long);
}
unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) {
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index 3e23df6..d83cde5 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -69,7 +69,7 @@ namespace gbe
GenRegister src1 = GenRegister::null());
virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null());
virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
- virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+ virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t accN);
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 564f207..9cdb41d 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -594,7 +594,7 @@ namespace gbe
NOT_SUPPORTED;
return insn->bits3.ud;
}
- unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) {
+ unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) {
GBE_ASSERT(0);
return 0;
}
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 5b4f4c2..ecb5051 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -241,7 +241,7 @@ namespace gbe
unsigned msg_length, unsigned response_length,
bool header_present = false, bool end_of_thread = false);
virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
- virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum);
+ virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long);
virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum);
unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 14a1930..f982817 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5582,28 +5582,46 @@ namespace gbe
sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
} else if (simdWidth == 16) {
vector<GenRegister> msgs;
+ RegisterFamily family = sel.getRegisterFamily(insn.getDst(0));
+ Type type = getType(family);
for (unsigned k = 0; k < msgPayload; k++) {
- msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
+ msgs.push_back(sel.selReg(sel.reg(family), type));
}
sel.push();
/* first quarter */
sel.curr.execWidth = 8;
sel.curr.quarterControl = GEN_COMPRESSION_Q1;
sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 0));
- if(msgPayload > 1)
- sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
- if(msgPayload > 2)
- sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+ if(msgPayload > 1) {
+ if(family == ir::FAMILY_QWORD)
+ sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 0));
+ else
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0));
+ }
+ if(msgPayload > 2) {
+ if(family == ir::FAMILY_QWORD)
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 0));
+ else
+ sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0));
+ }
sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
/* second quarter */
sel.curr.execWidth = 8;
sel.curr.quarterControl = GEN_COMPRESSION_Q2;
sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 1));
- if(msgPayload > 1)
- sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
- if(msgPayload > 2)
- sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+ if(msgPayload > 1) {
+ if(family == ir::FAMILY_QWORD)
+ sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 1));
+ else
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1));
+ }
+ if(msgPayload > 2) {
+ if(family == ir::FAMILY_QWORD)
+ sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 1));
+ else
+ sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1));
+ }
sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM));
sel.pop();
}
@@ -5633,17 +5651,18 @@ namespace gbe
msgPayload = srcNum;
}
- GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32);
- GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32);
+ Type type = getType(sel.getRegisterFamily(insn.getDst(0)));
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+ GenRegister src0 = sel.selReg(insn.getAddressRegister(), type);
GenRegister src1 = src0, src2 = src0;
- if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
- if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
+ if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), type);
+ if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), type);
GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
if (AM == AM_DynamicBti || AM == AM_StaticBti) {
if (AM == AM_DynamicBti) {
Register btiReg = insn.getBtiReg();
- sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM));
+ sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, type), sel.getBTITemps(AM));
} else {
unsigned SI = insn.getSurfaceIndex();
sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, GenRegister::immud(SI), sel.getBTITemps(AM));
diff --git a/backend/src/libocl/src/ocl_atomic.ll b/backend/src/libocl/src/ocl_atomic.ll
index 6b789b3..38efac0 100644
--- a/backend/src/libocl/src/ocl_atomic.ll
+++ b/backend/src/libocl/src/ocl_atomic.ll
@@ -8,12 +8,24 @@ entry:
ret i32 %0
}
+define i32 @__gen_ocl_atomic_exchangef(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
define i32 @__gen_ocl_atomic_fetch_add32(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %value seq_cst
ret i32 %0
}
+define i32 @__gen_ocl_atomic_fetch_addf(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %value seq_cst
+ ret i32 %0
+}
+
define i32 @__gen_ocl_atomic_fetch_sub32(i32 addrspace(4)* nocapture %ptr, i32 %value, i32 %order, i32 %scope) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %value seq_cst
--
2.1.4
More information about the Beignet
mailing list