[Beignet] [ocl2.0 1/4] GBE: add untyped A64 stateless message
Yang, Rong R
rong.r.yang at intel.com
Mon Nov 9 18:14:53 PST 2015
The patchset LGTM, thanks. Pushed.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Thursday, October 29, 2015 15:19
> To: beignet at lists.freedesktop.org
> Cc: Song, Ruiling
> Subject: [Beignet] [ocl2.0 1/4] GBE: add untyped A64 stateless message
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/backend/gen8_context.cpp | 12 ++++
> backend/src/backend/gen8_context.hpp | 2 +
> backend/src/backend/gen8_encoder.cpp | 57 +++++++++++++++--
> backend/src/backend/gen8_encoder.hpp | 2 +
> backend/src/backend/gen8_instruction.hpp | 13 ++++
> backend/src/backend/gen_context.cpp | 8 +++
> backend/src/backend/gen_context.hpp | 2 +
> backend/src/backend/gen_defs.hpp | 5 ++
> backend/src/backend/gen_encoder.cpp | 8 +++
> backend/src/backend/gen_encoder.hpp | 4 ++
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 2 +
> backend/src/backend/gen_insn_selection.cpp | 71
> +++++++++++++++++++---
> backend/src/backend/gen_insn_selection.hxx | 2 +
> 13 files changed, 174 insertions(+), 14 deletions(-)
>
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 83235c0..5974601 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -900,6 +900,18 @@ namespace gbe
> p->pop();
> }
> }
> + void Gen8Context::emitUntypedReadA64Instruction(const
> SelectionInstruction &insn) {
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister src = ra->genReg(insn.src(0));
> + const uint32_t elemNum = insn.extra.elem;
> + p->UNTYPED_READA64(dst, src, elemNum); }
> +
> + void Gen8Context::emitUntypedWriteA64Instruction(const
> SelectionInstruction &insn) {
> + const GenRegister src = ra->genReg(insn.src(0));
> + const uint32_t elemNum = insn.extra.elem;
> + p->UNTYPED_WRITEA64(src, elemNum);
> + }
> void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn)
> {
> const uint32_t elemNum = insn.extra.elem; diff --git
> a/backend/src/backend/gen8_context.hpp
> b/backend/src/backend/gen8_context.hpp
> index 84508e9..2029354 100644
> --- a/backend/src/backend/gen8_context.hpp
> +++ b/backend/src/backend/gen8_context.hpp
> @@ -66,6 +66,8 @@ namespace gbe
> virtual void emitFloatToI64Instruction(const SelectionInstruction &insn);
> virtual void emitI64MADSATInstruction(const SelectionInstruction &insn);
>
> + virtual void emitUntypedWriteA64Instruction(const SelectionInstruction
> &insn);
> + virtual void emitUntypedReadA64Instruction(const
> + SelectionInstruction &insn);
> virtual void emitWrite64Instruction(const SelectionInstruction &insn);
> virtual void emitRead64Instruction(const SelectionInstruction &insn);
> virtual void emitI64MULInstruction(const SelectionInstruction &insn); diff -
> -git a/backend/src/backend/gen8_encoder.cpp
> b/backend/src/backend/gen8_encoder.cpp
> index 69eabb2..f7999e5 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -72,13 +72,13 @@ namespace gbe
> Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
> const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA;
> setMessageDescriptor(insn, sfid, msg_length, response_length);
> - gen8_insn->bits3.gen7_untyped_rw.msg_type = msg_type;
> - gen8_insn->bits3.gen7_untyped_rw.bti = bti;
> - gen8_insn->bits3.gen7_untyped_rw.rgba = rgba;
> + gen8_insn->bits3.gen8_untyped_rw_a64.msg_type = msg_type;
> + gen8_insn->bits3.gen8_untyped_rw_a64.bti = bti;
> + gen8_insn->bits3.gen8_untyped_rw_a64.rgba = rgba;
> if (curr.execWidth == 8)
> - gen8_insn->bits3.gen7_untyped_rw.simd_mode =
> GEN_UNTYPED_SIMD8;
> + gen8_insn->bits3.gen8_untyped_rw_a64.simd_mode =
> + GEN_UNTYPED_SIMD8;
> else if (curr.execWidth == 16)
> - gen8_insn->bits3.gen7_untyped_rw.simd_mode =
> GEN_UNTYPED_SIMD16;
> + gen8_insn->bits3.gen8_untyped_rw_a64.simd_mode =
> + GEN_UNTYPED_SIMD16;
> else
> NOT_SUPPORTED;
> }
> @@ -227,6 +227,53 @@ namespace gbe
> this->setSrc1(insn, bti);
> }
> }
> + void Gen8Encoder::UNTYPED_READA64(GenRegister dst, GenRegister src,
> uint32_t elemNum) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + assert(elemNum >= 1 || elemNum <= 4);
> + uint32_t msg_length = 0;
> + uint32_t response_length = 0;
> + assert(this->curr.execWidth == 8);
> +
> + if (this->curr.execWidth == 8) {
> + msg_length = 2;
> + response_length = elemNum;
> + } else
> + NOT_IMPLEMENTED;
> +
> + this->setHeader(insn);
> + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
> + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setDPUntypedRW(insn,
> + 255, // stateless bti
> + untypedRWMask[elemNum],
> + GEN8_P1_UNTYPED_READ_A64,
> + msg_length,
> + response_length);
> + }
> +
> + void Gen8Encoder::UNTYPED_WRITEA64(GenRegister msg, uint32_t
> elemNum) {
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> + assert(elemNum >= 1 || elemNum <= 4);
> + uint32_t msg_length = 0;
> + uint32_t response_length = 0;
> + this->setHeader(insn);
> + if (this->curr.execWidth == 8) {
> + this->setDst(insn, GenRegister::retype(GenRegister::null(),
> GEN_TYPE_UD));
> + msg_length = 2 + elemNum;
> + } else
> + NOT_IMPLEMENTED;
> +
> + this->setSrc0(insn, GenRegister::ud8grf(msg.nr, 0));
> + this->setSrc1(insn, GenRegister::immud(0));
> + setDPUntypedRW(insn,
> + 255, //stateless bti
> + untypedRWMask[elemNum],
> + GEN8_P1_UNTYPED_WRITE_A64,
> + msg_length,
> + response_length);
> + }
> +
> void Gen8Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp,
> double value) {
> union { double d; unsigned u[2]; } u;
> u.d = value;
> diff --git a/backend/src/backend/gen8_encoder.hpp
> b/backend/src/backend/gen8_encoder.hpp
> index 504e13d..b7d900f 100644
> --- a/backend/src/backend/gen8_encoder.hpp
> +++ b/backend/src/backend/gen8_encoder.hpp
> @@ -52,6 +52,8 @@ namespace gbe
> virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src,
> GenRegister bti, uint32_t srcNum);
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src,
> GenRegister bti, uint32_t elemNum);
> virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> + virtual void UNTYPED_READA64(GenRegister dst, GenRegister src,
> uint32_t elemNum);
> + virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
> virtual void setHeader(GenNativeInstruction *insn);
> virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti,
> uint32_t rgba,
> uint32_t msg_type, uint32_t msg_length, uint32_t
> response_length); diff --git a/backend/src/backend/gen8_instruction.hpp
> b/backend/src/backend/gen8_instruction.hpp
> index 5cf1032..50d9aca 100644
> --- a/backend/src/backend/gen8_instruction.hpp
> +++ b/backend/src/backend/gen8_instruction.hpp
> @@ -492,6 +492,19 @@ union Gen8NativeInstruction
> uint32_t end_of_thread:1;
> } gen7_atomic_op;
>
> + // gen8 untyped read/write
> + struct {
> + uint32_t bti:8;
> + uint32_t rgba:4;
> + uint32_t simd_mode:2;
> + uint32_t msg_type:5;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen8_untyped_rw_a64;
> +
> struct {
> uint32_t src1_subreg_nr_high:1;
> uint32_t src1_reg_nr:8;
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index baf3897..2ed3087 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2208,6 +2208,14 @@ namespace gbe
>
> }
>
> + void GenContext::emitUntypedReadA64Instruction(const
> SelectionInstruction &insn) {
> + assert(0);
> + }
> +
> + void GenContext::emitUntypedWriteA64Instruction(const
> SelectionInstruction &insn) {
> + assert(0);
> + }
> +
> void GenContext::emitUnpackByteInstruction(const SelectionInstruction
> &insn) {
> const GenRegister src = ra->genReg(insn.src(0));
> for(uint32_t i = 0; i < insn.dstNum; i++) { diff --git
> a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 155b68e..5e06d0b 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -158,6 +158,8 @@ namespace gbe
> virtual void emitWrite64Instruction(const SelectionInstruction &insn);
> void emitUntypedReadInstruction(const SelectionInstruction &insn);
> void emitUntypedWriteInstruction(const SelectionInstruction &insn);
> + virtual void emitUntypedReadA64Instruction(const SelectionInstruction
> &insn);
> + virtual void emitUntypedWriteA64Instruction(const
> + SelectionInstruction &insn);
> void emitAtomicInstruction(const SelectionInstruction &insn);
> void emitByteGatherInstruction(const SelectionInstruction &insn);
> void emitByteScatterInstruction(const SelectionInstruction &insn); diff --
> git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index 1ca148c..b6aa5ab 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -355,6 +355,11 @@ enum GenMessageTarget {
> #define GEN75_P1_ATOMIC_COUNTER_4X2 12 //1100: Atomic Counter
> Operation 4X2
> #define GEN75_P1_TYPED_SURFACE_WRITE 13 //1101: Typed Surface
> Write
>
> +#define GEN8_P1_BYTE_GATHER_A64 16 //10000
> +#define GEN8_P1_UNTYPED_READ_A64 17 //10001
> +#define GEN8_P1_UNTYPED_WRITE_A64 25 //11001
> +#define GEN8_P1_BYTE_SCATTER_A64 26 //11010
> +
> /* Data port data cache scratch messages*/
> #define GEN_SCRATCH_READ 0
> #define GEN_SCRATCH_WRITE 1
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index cac29e8..3e80271 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -397,6 +397,14 @@ namespace gbe
> return insn->bits3.ud;
> }
>
> + void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src,
> uint32_t elemNum) {
> + assert(0);
> + }
> +
> + void GenEncoder::UNTYPED_WRITEA64(GenRegister src, uint32_t
> elemNum){
> + assert(0);
> + }
> +
> void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti,
> uint32_t elemNum) {
> GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4); diff --git
> a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index 79e7b6e..341f431 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -174,6 +174,10 @@ namespace gbe
> virtual void UNTYPED_READ(GenRegister dst, GenRegister src,
> GenRegister bti, uint32_t elemNum);
> /*! Untyped write (upto 4 channels) */
> virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t
> elemNum);
> + /*! Untyped read A64(upto 4 channels) */
> + virtual void UNTYPED_READA64(GenRegister dst, GenRegister src,
> uint32_t elemNum);
> + /*! Untyped write (upto 4 channels) */
> + virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
> /*! Byte gather (for unaligned bytes, shorts and ints) */
> void BYTE_GATHER(GenRegister dst, GenRegister src, GenRegister bti,
> uint32_t elemSize);
> /*! Byte scatter (for unaligned bytes, shorts and ints) */ diff --git
> a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index d073770..1ef8f5f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -28,6 +28,8 @@ DECL_GEN7_SCHEDULE(Read64, 80, 1, 1)
> DECL_GEN7_SCHEDULE(Write64, 80, 1, 1)
> DECL_GEN7_SCHEDULE(UntypedRead, 160, 1, 1)
> DECL_GEN7_SCHEDULE(UntypedWrite, 160, 1, 1)
> +DECL_GEN7_SCHEDULE(UntypedReadA64, 160, 1, 1)
> +DECL_GEN7_SCHEDULE(UntypedWriteA64, 160, 1, 1)
> DECL_GEN7_SCHEDULE(ByteGather, 160, 1, 1)
> DECL_GEN7_SCHEDULE(ByteScatter, 160, 1, 1)
> DECL_GEN7_SCHEDULE(DWordGather, 160, 1, 1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 259c3cf..fef0e00 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -182,11 +182,12 @@ namespace gbe
> }
>
> bool SelectionInstruction::isRead(void) const {
> - return this->opcode == SEL_OP_UNTYPED_READ ||
> - this->opcode == SEL_OP_READ64 ||
> - this->opcode == SEL_OP_ATOMIC ||
> - this->opcode == SEL_OP_BYTE_GATHER ||
> - this->opcode == SEL_OP_SAMPLE ||
> + return this->opcode == SEL_OP_UNTYPED_READ ||
> + this->opcode == SEL_OP_UNTYPED_READA64 ||
> + this->opcode == SEL_OP_READ64 ||
> + this->opcode == SEL_OP_ATOMIC ||
> + this->opcode == SEL_OP_BYTE_GATHER ||
> + this->opcode == SEL_OP_SAMPLE ||
> this->opcode == SEL_OP_DWORD_GATHER;
> }
>
> @@ -205,10 +206,11 @@ namespace gbe
> }
>
> bool SelectionInstruction::isWrite(void) const {
> - return this->opcode == SEL_OP_UNTYPED_WRITE ||
> - this->opcode == SEL_OP_WRITE64 ||
> - this->opcode == SEL_OP_ATOMIC ||
> - this->opcode == SEL_OP_BYTE_SCATTER ||
> + return this->opcode == SEL_OP_UNTYPED_WRITE ||
> + this->opcode == SEL_OP_UNTYPED_WRITEA64 ||
> + this->opcode == SEL_OP_WRITE64 ||
> + this->opcode == SEL_OP_ATOMIC ||
> + this->opcode == SEL_OP_BYTE_SCATTER ||
> this->opcode == SEL_OP_TYPED_WRITE;
> }
>
> @@ -633,6 +635,10 @@ namespace gbe
> void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, GenRegister bti,
> vector<GenRegister> temps);
> /*! Byte scatter (for unaligned bytes, shorts and ints) */
> void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, GenRegister bti,
> vector <GenRegister> temps);
> + /*! Untyped read (up to 4 elements) */
> + void UNTYPED_READA64(Reg addr, const GenRegister *dst, uint32_t
> dstNum, uint32_t elemNum);
> + /*! Untyped write (up to 4 elements) */
> + void UNTYPED_WRITEA64(const GenRegister *msgs, uint32_t msgNum,
> + uint32_t elemNum);
> /*! DWord scatter (for constant cache read) */
> void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti);
> /*! Unpack the uint to charN */
> @@ -1391,6 +1397,34 @@ namespace gbe
> srcVector->offsetID = 0;
> srcVector->reg = &insn->src(0);
> }
> + void Selection::Opaque::UNTYPED_READA64(Reg addr,
> + const GenRegister *dst,
> + uint32_t dstNum,
> + uint32_t elemNum) {
> + SelectionInstruction *insn = this-
> >appendInsn(SEL_OP_UNTYPED_READA64, dstNum, 1);
> + SelectionVector *srcVector = this->appendVector();
> + SelectionVector *dstVector = this->appendVector();
> + if (this->isScalarReg(dst[0].reg()))
> + insn->state.noMask = 1;
> + // Regular instruction to encode
> + for (uint32_t id = 0; id < dstNum; ++id)
> + insn->dst(id) = dst[id];
> +
> + insn->src(0) = addr;
> + insn->extra.elem = elemNum;
> +
> + // Sends require contiguous allocation
> + dstVector->regNum = dstNum;
> + dstVector->isSrc = 0;
> + dstVector->offsetID = 0;
> + dstVector->reg = &insn->dst(0);
> +
> + srcVector->regNum = 1;
> + srcVector->isSrc = 1;
> + srcVector->offsetID = 0;
> + srcVector->reg = &insn->src(0);
> + }
>
> void Selection::Opaque::WRITE64(Reg addr,
> const GenRegister *src, @@ -1500,6 +1534,25 @@
> namespace gbe
> vector->isSrc = 1;
> }
>
> + void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src,
> + uint32_t msgNum,
> + uint32_t elemNum) {
> + SelectionInstruction *insn = this-
> >appendInsn(SEL_OP_UNTYPED_WRITEA64, 0, msgNum);
> + SelectionVector *vector = this->appendVector();
> +
> + // Regular instruction to encode
> + for (uint32_t id = 0; id < msgNum; ++id)
> + insn->src(id) = src[id];
> + insn->extra.elem = elemNum;
> +
> + // Sends require contiguous allocation for the sources
> + vector->regNum = msgNum;
> + vector->reg = &insn->src(0);
> + vector->offsetID = 0;
> + vector->isSrc = 1;
> + }
> +
> void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr,
> uint32_t elemSize,
> GenRegister bti, diff --git
> a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index adbb137..bbccc54 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -55,6 +55,8 @@ DECL_SELECTION_IR(BARRIER, BarrierInstruction)
> DECL_SELECTION_IR(FENCE, FenceInstruction)
> DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
> DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
> +DECL_SELECTION_IR(UNTYPED_READA64, UntypedReadA64Instruction)
> +DECL_SELECTION_IR(UNTYPED_WRITEA64, UntypedWriteA64Instruction)
> DECL_SELECTION_IR(READ64, Read64Instruction)
> DECL_SELECTION_IR(WRITE64, Write64Instruction)
> DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
> --
> 2.3.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list