[Beignet] [ocl2.0 1/4] GBE: add untyped A64 stateless message
Ruiling Song
ruiling.song at intel.com
Thu Oct 29 00:19:15 PDT 2015
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen8_context.cpp | 12 ++++
backend/src/backend/gen8_context.hpp | 2 +
backend/src/backend/gen8_encoder.cpp | 57 +++++++++++++++--
backend/src/backend/gen8_encoder.hpp | 2 +
backend/src/backend/gen8_instruction.hpp | 13 ++++
backend/src/backend/gen_context.cpp | 8 +++
backend/src/backend/gen_context.hpp | 2 +
backend/src/backend/gen_defs.hpp | 5 ++
backend/src/backend/gen_encoder.cpp | 8 +++
backend/src/backend/gen_encoder.hpp | 4 ++
.../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
backend/src/backend/gen_insn_selection.cpp | 71 +++++++++++++++++++---
backend/src/backend/gen_insn_selection.hxx | 2 +
13 files changed, 174 insertions(+), 14 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 83235c0..5974601 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -900,6 +900,18 @@ namespace gbe
p->pop();
}
}
+ void Gen8Context::emitUntypedReadA64Instruction(const SelectionInstruction &insn) {
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t elemNum = insn.extra.elem;
+ p->UNTYPED_READA64(dst, src, elemNum);
+ }
+
+ void Gen8Context::emitUntypedWriteA64Instruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ const uint32_t elemNum = insn.extra.elem;
+ p->UNTYPED_WRITEA64(src, elemNum);
+ }
void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn)
{
const uint32_t elemNum = insn.extra.elem;
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 84508e9..2029354 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -66,6 +66,8 @@ namespace gbe
virtual void emitFloatToI64Instruction(const SelectionInstruction &insn);
virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
+ virtual void emitUntypedWriteA64Instruction(const SelectionInstruction &insn);
+ virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
virtual void emitRead64Instruction(const SelectionInstruction &insn);
virtual void emitI64MULInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 69eabb2..f7999e5 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -72,13 +72,13 @@ namespace gbe
Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
setMessageDescriptor(insn, sfid, msg_length, response_length);
- gen8_insn->bits3.gen7_untyped_rw.msg_type = msg_type;
- gen8_insn->bits3.gen7_untyped_rw.bti = bti;
- gen8_insn->bits3.gen7_untyped_rw.rgba = rgba;
+ gen8_insn->bits3.gen8_untyped_rw_a64.msg_type = msg_type;
+ gen8_insn->bits3.gen8_untyped_rw_a64.bti = bti;
+ gen8_insn->bits3.gen8_untyped_rw_a64.rgba = rgba;
if (curr.execWidth == 8)
- gen8_insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
+ gen8_insn->bits3.gen8_untyped_rw_a64.simd_mode = GEN_UNTYPED_SIMD8;
else if (curr.execWidth == 16)
- gen8_insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
+ gen8_insn->bits3.gen8_untyped_rw_a64.simd_mode = GEN_UNTYPED_SIMD16;
else
NOT_SUPPORTED;
}
@@ -227,6 +227,53 @@ namespace gbe
this->setSrc1(insn, bti);
}
}
+ void Gen8Encoder::UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ assert(this->curr.execWidth == 8);
+
+ if (this->curr.execWidth == 8) {
+ msg_length = 2;
+ response_length = elemNum;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ 255, // stateless bti
+ untypedRWMask[elemNum],
+ GEN8_P1_UNTYPED_READ_A64,
+ msg_length,
+ response_length);
+ }
+
+ void Gen8Encoder::UNTYPED_WRITEA64(GenRegister msg, uint32_t elemNum) {
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+ assert(elemNum >= 1 || elemNum <= 4);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ this->setHeader(insn);
+ if (this->curr.execWidth == 8) {
+ this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ msg_length = 2 + elemNum;
+ } else
+ NOT_IMPLEMENTED;
+
+ this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenRegister::immud(0));
+ setDPUntypedRW(insn,
+ 255, //stateless bti
+ untypedRWMask[elemNum],
+ GEN8_P1_UNTYPED_WRITE_A64,
+ msg_length,
+ response_length);
+ }
+
void Gen8Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value) {
union { double d; unsigned u[2]; } u;
u.d = value;
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index 504e13d..b7d900f 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -52,6 +52,8 @@ namespace gbe
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum);
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+ virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
+ virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
virtual void setHeader(GenNativeInstruction *insn);
virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, uint32_t rgba,
uint32_t msg_type, uint32_t msg_length, uint32_t response_length);
diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp
index 5cf1032..50d9aca 100644
--- a/backend/src/backend/gen8_instruction.hpp
+++ b/backend/src/backend/gen8_instruction.hpp
@@ -492,6 +492,19 @@ union Gen8NativeInstruction
uint32_t end_of_thread:1;
} gen7_atomic_op;
+ // gen8 untyped read/write
+ struct {
+ uint32_t bti:8;
+ uint32_t rgba:4;
+ uint32_t simd_mode:2;
+ uint32_t msg_type:5;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen8_untyped_rw_a64;
+
struct {
uint32_t src1_subreg_nr_high:1;
uint32_t src1_reg_nr:8;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index baf3897..2ed3087 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2208,6 +2208,14 @@ namespace gbe
}
+ void GenContext::emitUntypedReadA64Instruction(const SelectionInstruction &insn) {
+ assert(0);
+ }
+
+ void GenContext::emitUntypedWriteA64Instruction(const SelectionInstruction &insn) {
+ assert(0);
+ }
+
void GenContext::emitUnpackByteInstruction(const SelectionInstruction &insn) {
const GenRegister src = ra->genReg(insn.src(0));
for(uint32_t i = 0; i < insn.dstNum; i++) {
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 155b68e..5e06d0b 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -158,6 +158,8 @@ namespace gbe
virtual void emitWrite64Instruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
+ virtual void emitUntypedReadA64Instruction(const SelectionInstruction &insn);
+ virtual void emitUntypedWriteA64Instruction(const SelectionInstruction &insn);
void emitAtomicInstruction(const SelectionInstruction &insn);
void emitByteGatherInstruction(const SelectionInstruction &insn);
void emitByteScatterInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 1ca148c..b6aa5ab 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -355,6 +355,11 @@ enum GenMessageTarget {
#define GEN75_P1_ATOMIC_COUNTER_4X2 12 //1100: Atomic Counter Operation 4X2
#define GEN75_P1_TYPED_SURFACE_WRITE 13 //1101: Typed Surface Write
+#define GEN8_P1_BYTE_GATHER_A64 16 //10000
+#define GEN8_P1_UNTYPED_READ_A64 17 //10001
+#define GEN8_P1_UNTYPED_WRITE_A64 25 //11001
+#define GEN8_P1_BYTE_SCATTER_A64 26 //11010
+
/* Data port data cache scratch messages*/
#define GEN_SCRATCH_READ 0
#define GEN_SCRATCH_WRITE 1
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index cac29e8..3e80271 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -397,6 +397,14 @@ namespace gbe
return insn->bits3.ud;
}
+ void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum) {
+ assert(0);
+ }
+
+ void GenEncoder::UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum){
+ assert(0);
+ }
+
void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t elemNum) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 79e7b6e..341f431 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -174,6 +174,10 @@ namespace gbe
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */
virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum);
+ /*! Untyped read A64(upto 4 channels) */
+ virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum);
+ /*! Untyped write (upto 4 channels) */
+ virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
/*! Byte gather (for unaligned bytes, shorts and ints) */
void BYTE_GATHER(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemSize);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index d073770..1ef8f5f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -28,6 +28,8 @@ DECL_GEN7_SCHEDULE(Read64, 80, 1, 1)
DECL_GEN7_SCHEDULE(Write64, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedRead, 160, 1, 1)
DECL_GEN7_SCHEDULE(UntypedWrite, 160, 1, 1)
+DECL_GEN7_SCHEDULE(UntypedReadA64, 160, 1, 1)
+DECL_GEN7_SCHEDULE(UntypedWriteA64, 160, 1, 1)
DECL_GEN7_SCHEDULE(ByteGather, 160, 1, 1)
DECL_GEN7_SCHEDULE(ByteScatter, 160, 1, 1)
DECL_GEN7_SCHEDULE(DWordGather, 160, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 259c3cf..fef0e00 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -182,11 +182,12 @@ namespace gbe
}
bool SelectionInstruction::isRead(void) const {
- return this->opcode == SEL_OP_UNTYPED_READ ||
- this->opcode == SEL_OP_READ64 ||
- this->opcode == SEL_OP_ATOMIC ||
- this->opcode == SEL_OP_BYTE_GATHER ||
- this->opcode == SEL_OP_SAMPLE ||
+ return this->opcode == SEL_OP_UNTYPED_READ ||
+ this->opcode == SEL_OP_UNTYPED_READA64 ||
+ this->opcode == SEL_OP_READ64 ||
+ this->opcode == SEL_OP_ATOMIC ||
+ this->opcode == SEL_OP_BYTE_GATHER ||
+ this->opcode == SEL_OP_SAMPLE ||
this->opcode == SEL_OP_DWORD_GATHER;
}
@@ -205,10 +206,11 @@ namespace gbe
}
bool SelectionInstruction::isWrite(void) const {
- return this->opcode == SEL_OP_UNTYPED_WRITE ||
- this->opcode == SEL_OP_WRITE64 ||
- this->opcode == SEL_OP_ATOMIC ||
- this->opcode == SEL_OP_BYTE_SCATTER ||
+ return this->opcode == SEL_OP_UNTYPED_WRITE ||
+ this->opcode == SEL_OP_UNTYPED_WRITEA64 ||
+ this->opcode == SEL_OP_WRITE64 ||
+ this->opcode == SEL_OP_ATOMIC ||
+ this->opcode == SEL_OP_BYTE_SCATTER ||
this->opcode == SEL_OP_TYPED_WRITE;
}
@@ -633,6 +635,10 @@ namespace gbe
void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti, vector<GenRegister> temps);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti, vector <GenRegister> temps);
+ /*! Untyped read (up to 4 elements) */
+ void UNTYPED_READA64(Reg addr, const GenRegister *dst, uint32_t dstNum, uint32_t elemNum);
+ /*! Untyped write (up to 4 elements) */
+ void UNTYPED_WRITEA64(const GenRegister *msgs, uint32_t msgNum, uint32_t elemNum);
/*! DWord scatter (for constant cache read) */
void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
/*! Unpack the uint to charN */
@@ -1391,6 +1397,34 @@ namespace gbe
srcVector->offsetID = 0;
srcVector->reg = &insn->src(0);
}
+ void Selection::Opaque::UNTYPED_READA64(Reg addr,
+ const GenRegister *dst,
+ uint32_t dstNum,
+ uint32_t elemNum)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_READA64, dstNum, 1);
+ SelectionVector *srcVector = this->appendVector();
+ SelectionVector *dstVector = this->appendVector();
+ if (this->isScalarReg(dst[0].reg()))
+ insn->state.noMask = 1;
+ // Regular instruction to encode
+ for (uint32_t id = 0; id < dstNum; ++id)
+ insn->dst(id) = dst[id];
+
+ insn->src(0) = addr;
+ insn->extra.elem = elemNum;
+
+ // Sends require contiguous allocation
+ dstVector->regNum = dstNum;
+ dstVector->isSrc = 0;
+ dstVector->offsetID = 0;
+ dstVector->reg = &insn->dst(0);
+
+ srcVector->regNum = 1;
+ srcVector->isSrc = 1;
+ srcVector->offsetID = 0;
+ srcVector->reg = &insn->src(0);
+ }
void Selection::Opaque::WRITE64(Reg addr,
const GenRegister *src,
@@ -1500,6 +1534,25 @@ namespace gbe
vector->isSrc = 1;
}
+ void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src,
+ uint32_t msgNum,
+ uint32_t elemNum)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITEA64, 0, msgNum);
+ SelectionVector *vector = this->appendVector();
+
+ // Regular instruction to encode
+ for (uint32_t id = 0; id < msgNum; ++id)
+ insn->src(id) = src[id];
+ insn->extra.elem = elemNum;
+
+ // Sends require contiguous allocation for the sources
+ vector->regNum = msgNum;
+ vector->reg = &insn->src(0);
+ vector->offsetID = 0;
+ vector->isSrc = 1;
+ }
+
void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr,
uint32_t elemSize,
GenRegister bti,
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index adbb137..bbccc54 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -55,6 +55,8 @@ DECL_SELECTION_IR(BARRIER, BarrierInstruction)
DECL_SELECTION_IR(FENCE, FenceInstruction)
DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
+DECL_SELECTION_IR(UNTYPED_READA64, UntypedReadA64Instruction)
+DECL_SELECTION_IR(UNTYPED_WRITEA64, UntypedWriteA64Instruction)
DECL_SELECTION_IR(READ64, Read64Instruction)
DECL_SELECTION_IR(WRITE64, Write64Instruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
--
2.3.1
More information about the Beignet
mailing list