[Beignet] [PATCH] GBE: Implement instruction compact.
Zhigang Gong
zhigang.gong at linux.intel.com
Sun Apr 20 18:08:36 PDT 2014
This patch LGTM. Good work, Ruiling.
One minor comment is that I found you put the following statement in the file
you newly created.
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>
I assume it is a copy/paste error, right? I will correct it for you and
push the patch latter. Thanks.
On Tue, Apr 15, 2014 at 04:53:17PM +0800, Ruiling Song wrote:
> A native GEN ASM would takes 2*64bit, but GEN also support compact instruction
> which only takes 64bit. To make code easily understood, GenInstruction now only
> stands for 64bit memory, and use GenNativeInstruction & GenCompactInstruction
> to represent normal(native) and compact instruction.
>
> After this change, it is not easily to map SelectionInstruction distance to ASM distance.
> As the instructions in the distance maybe compacted. To not introduce too much
> complexity, JMP, IF, ENDIF, NOP will NEVER be compacted.
>
> Some experiment in luxMark shows it could reduce about 20% instruction memory.
> But it is sad that no performance improvement observed.
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/backend/gen/gen_mesa_disasm.c | 20 +-
> backend/src/backend/gen_context.cpp | 41 +-
> backend/src/backend/gen_defs.hpp | 953 +++++++++++++++-------------
> backend/src/backend/gen_encoder.cpp | 116 ++--
> backend/src/backend/gen_encoder.hpp | 11 +-
> backend/src/backend/gen_insn_compact.cpp | 521 +++++++++++++++
> backend/src/backend/gen_insn_selection.cpp | 2 +-
> backend/src/backend/gen_reg_allocation.cpp | 4 +-
> backend/src/backend/gen_register.hpp | 4 +-
> 10 files changed, 1133 insertions(+), 540 deletions(-)
> create mode 100644 backend/src/backend/gen_insn_compact.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index d6f2d3c..9c96f33 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -162,6 +162,7 @@ else (GBE_USE_BLOB)
> backend/gen_program.hpp
> backend/gen_program.h
> backend/gen_defs.hpp
> + backend/gen_insn_compact.cpp
> backend/gen_encoder.hpp
> backend/gen_encoder.cpp)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index e58ef31..871277b 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -533,7 +533,7 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
> return err;
> }
>
> -static int dest (FILE *file, const struct GenInstruction *inst)
> +static int dest (FILE *file, const union GenNativeInstruction *inst)
> {
> int err = 0;
>
> @@ -587,7 +587,7 @@ static int dest (FILE *file, const struct GenInstruction *inst)
> return 0;
> }
>
> -static int dest_3src (FILE *file, const struct GenInstruction *inst)
> +static int dest_3src (FILE *file, const union GenNativeInstruction *inst)
> {
> int err = 0;
> const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE;
> @@ -720,7 +720,7 @@ static int src_da16 (FILE *file,
> return err;
> }
>
> -static int src0_3src (FILE *file, const struct GenInstruction *inst)
> +static int src0_3src (FILE *file, const union GenNativeInstruction *inst)
> {
> int err = 0;
> uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
> @@ -768,7 +768,7 @@ static int src0_3src (FILE *file, const struct GenInstruction *inst)
> return err;
> }
>
> -static int src1_3src (FILE *file, const struct GenInstruction *inst)
> +static int src1_3src (FILE *file, const union GenNativeInstruction *inst)
> {
> int err = 0;
> uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
> @@ -821,7 +821,7 @@ static int src1_3src (FILE *file, const struct GenInstruction *inst)
> }
>
>
> -static int src2_3src (FILE *file, const struct GenInstruction *inst)
> +static int src2_3src (FILE *file, const union GenNativeInstruction *inst)
> {
> int err = 0;
> uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
> @@ -871,7 +871,7 @@ static int src2_3src (FILE *file, const struct GenInstruction *inst)
> return err;
> }
>
> -static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
> +static int imm (FILE *file, uint32_t type, const union GenNativeInstruction *inst) {
> switch (type) {
> case GEN_TYPE_UD:
> format (file, "0x%xUD", inst->bits3.ud);
> @@ -900,7 +900,7 @@ static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
> return 0;
> }
>
> -static int src0 (FILE *file, const struct GenInstruction *inst)
> +static int src0 (FILE *file, const union GenNativeInstruction *inst)
> {
> if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE)
> return imm (file, inst->bits1.da1.src0_reg_type,
> @@ -960,7 +960,7 @@ static int src0 (FILE *file, const struct GenInstruction *inst)
> }
> }
>
> -static int src1 (FILE *file, const struct GenInstruction *inst)
> +static int src1 (FILE *file, const union GenNativeInstruction *inst)
> {
> if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE)
> return imm (file, inst->bits1.da1.src1_reg_type,
> @@ -1029,7 +1029,7 @@ static const int esize[6] = {
> [5] = 32,
> };
>
> -static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
> +static int qtr_ctrl(FILE *file, const union GenNativeInstruction *inst)
> {
> int qtr_ctl = inst->header.quarter_control;
> int exec_size = esize[inst->header.execution_size];
> @@ -1060,7 +1060,7 @@ static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
>
> int gen_disasm (FILE *file, const void *opaque_insn)
> {
> - const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn;
> + const union GenNativeInstruction *inst = (const union GenNativeInstruction *) opaque_insn;
> int err = 0;
> int space = 0;
> int gen = 7;
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 50f10c5..f8292d6 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -39,6 +39,7 @@
>
> namespace gbe
> {
> + extern void decompactInstruction(union GenCompactInstruction *p, union GenNativeInstruction *pOut);
> ///////////////////////////////////////////////////////////////////////////
> // GenContext implementation
> ///////////////////////////////////////////////////////////////////////////
> @@ -88,16 +89,18 @@ namespace gbe
> const LabelIndex label = pair.first;
> const int32_t insnID = pair.second;
> const int32_t targetID = labelPos.find(label)->second;
> - p->patchJMPI(insnID, (targetID - insnID) * 2);
> + p->patchJMPI(insnID, (targetID - insnID));
> }
> for (auto pair : branchPos3) {
> const LabelPair labelPair = pair.first;
> const int32_t insnID = pair.second;
> - const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0;
> - const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1;
> - assert((jip - insnID) * 2 < 32767 && (jip - insnID) * 2 > -32768);
> - assert((uip - insnID) * 2 < 32767 && (uip - insnID) * 2 > -32768);
> - p->patchJMPI(insnID, (((uip - insnID) * 2) << 16) | ((jip - insnID) * 2));
> + // FIXME the 'labelPair' implementation must be fixed, as it is hard to
> + // convert InstructionSelection offset to ASM offset since asm maybe compacted
> + const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0*2;
> + const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1*2;
> + assert((jip - insnID) < 32767 && (jip - insnID) > -32768);
> + assert((uip - insnID) < 32767 && (uip - insnID) > -32768);
> + p->patchJMPI(insnID, (((uip - insnID)) << 16) | ((jip - insnID)));
> }
> }
>
> @@ -975,7 +978,7 @@ namespace gbe
> p->SHL(high, low, tmp);
> p->MOV(low, GenRegister::immud(0));
>
> - p->patchJMPI(jip1, (p->n_instruction() - jip1) * 2);
> + p->patchJMPI(jip1, (p->n_instruction() - jip1) );
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(31)); //update dst where high != 0
> p->curr.predicate = GEN_PREDICATE_NORMAL;
> @@ -989,7 +992,7 @@ namespace gbe
> p->CMP(GEN_CONDITIONAL_EQ, high, GenRegister::immud(0x80000000));
> p->CMP(GEN_CONDITIONAL_EQ, low, GenRegister::immud(0x0));
> p->AND(dst_ud, dst_ud, GenRegister::immud(0xfffffffe));
> - p->patchJMPI(jip0, (p->n_instruction() - jip0) * 2);
> + p->patchJMPI(jip0, (p->n_instruction() - jip0));
>
> p->pop();
>
> @@ -1426,6 +1429,7 @@ namespace gbe
> GenRegister zero = GenRegister::immud(0),
> one = GenRegister::immud(1),
> imm31 = GenRegister::immud(31);
> + uint32_t jip0;
> // (a,b) <- x
> loadTopHalf(a, x);
> loadBottomHalf(b, x);
> @@ -1516,10 +1520,11 @@ namespace gbe
> p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
> else
> NOT_IMPLEMENTED;
> - int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
> + int distance = -(int)(p->n_instruction() - loop_start );
> p->curr.noMask = 1;
> + jip0 = p->n_instruction();
> p->JMPI(zero);
> - p->patchJMPI(p->n_instruction() - 1, jip + 2);
> + p->patchJMPI(jip0, distance);
> p->pop();
> // end of loop
> }
> @@ -2001,14 +2006,24 @@ namespace gbe
> if (OCL_OUTPUT_ASM) {
> std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
> ir::LabelIndex curLabel = (ir::LabelIndex)0;
> + GenCompactInstruction * pCom = NULL;
> + GenNativeInstruction insn;
> std::cout << " L0:" << std::endl;
> - for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) {
> + for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
> if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
> std::cout << " L" << curLabel + 1 << ":" << std::endl;
> curLabel = (ir::LabelIndex)(curLabel + 1);
> }
> - std::cout << " (" << std::setw(8) << insnID * 2 << ") ";
> - gen_disasm(stdout, &p->store[insnID]);
> + std::cout << " (" << std::setw(8) << insnID << ") ";
> + pCom = (GenCompactInstruction*)&p->store[insnID];
> + if(pCom->bits1.cmpt_control == 1) {
> + decompactInstruction(pCom, &insn);
> + gen_disasm(stdout, &insn);
> + insnID++;
> + } else {
> + gen_disasm(stdout, &p->store[insnID]);
> + insnID = insnID + 2;
> + }
> }
> std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
> }
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index e731174..4ad1cd1 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -436,475 +436,512 @@ enum GenMessageTarget {
> #define GEN_MAX_GRF 128
>
> /* Instruction format for the execution units */
> -struct GenInstruction
> -{
> - struct {
> - uint32_t opcode:7;
> - uint32_t pad:1;
> - uint32_t access_mode:1;
> - uint32_t mask_control:1;
> - uint32_t dependency_control:2;
> - uint32_t quarter_control:2;
> - uint32_t thread_control:2;
> - uint32_t predicate_control:4;
> - uint32_t predicate_inverse:1;
> - uint32_t execution_size:3;
> - uint32_t destreg_or_condmod:4;
> - uint32_t acc_wr_control:1;
> - uint32_t cmpt_control:1;
> - uint32_t debug_control:1;
> - uint32_t saturate:1;
> - } header;
> -
> - union {
> - struct {
> - uint32_t dest_reg_file:2;
> - uint32_t dest_reg_type:3;
> - uint32_t src0_reg_file:2;
> - uint32_t src0_reg_type:3;
> - uint32_t src1_reg_file:2;
> - uint32_t src1_reg_type:3;
> - uint32_t nib_ctrl:1;
> - uint32_t dest_subreg_nr:5;
> - uint32_t dest_reg_nr:8;
> - uint32_t dest_horiz_stride:2;
> - uint32_t dest_address_mode:1;
> - } da1;
> -
> - struct {
> - uint32_t dest_reg_file:2;
> - uint32_t dest_reg_type:3;
> - uint32_t src0_reg_file:2;
> - uint32_t src0_reg_type:3;
> - uint32_t src1_reg_file:2; /* 0x00000c00 */
> - uint32_t src1_reg_type:3; /* 0x00007000 */
> - uint32_t nib_ctrl:1;
> - int dest_indirect_offset:10; /* offset against the deref'd address reg */
> - uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
> - uint32_t dest_horiz_stride:2;
> - uint32_t dest_address_mode:1;
> - } ia1;
> -
> - struct {
> - uint32_t dest_reg_file:2;
> - uint32_t dest_reg_type:3;
> - uint32_t src0_reg_file:2;
> - uint32_t src0_reg_type:3;
> - uint32_t src1_reg_file:2;
> - uint32_t src1_reg_type:3;
> - uint32_t nib_ctrl:1;
> - uint32_t dest_writemask:4;
> - uint32_t dest_subreg_nr:1;
> - uint32_t dest_reg_nr:8;
> - uint32_t dest_horiz_stride:2;
> - uint32_t dest_address_mode:1;
> - } da16;
>
> - struct {
> - uint32_t dest_reg_file:2;
> - uint32_t dest_reg_type:3;
> - uint32_t src0_reg_file:2;
> - uint32_t src0_reg_type:3;
> - uint32_t nib_ctrl:1;
> - uint32_t dest_writemask:4;
> - int dest_indirect_offset:6;
> - uint32_t dest_subreg_nr:3;
> - uint32_t dest_horiz_stride:2;
> - uint32_t dest_address_mode:1;
> - } ia16;
> +struct GenInstruction {
> + uint32_t low;
> + uint32_t high;
> +};
>
> +union GenCompactInstruction {
> + struct GenInstruction low;
> + struct {
> struct {
> - uint32_t dest_reg_file:2;
> - uint32_t dest_reg_type:3;
> - uint32_t src0_reg_file:2;
> - uint32_t src0_reg_type:3;
> - uint32_t src1_reg_file:2;
> - uint32_t src1_reg_type:3;
> + uint32_t opcode:7;
> + uint32_t debug_control:1;
> + uint32_t control_index:5;
> + uint32_t data_type_index:5;
> + uint32_t sub_reg_index:5;
> + uint32_t acc_wr_control:1;
> + uint32_t destreg_or_condmod:4;
> uint32_t pad:1;
> - int jump_count:16;
> - } branch_gen6;
> -
> + uint32_t cmpt_control:1;
> + uint32_t src0_index_lo:2;
> + } bits1;
> struct {
> - uint32_t dest_reg_file:1;
> - uint32_t flag_subreg_num:1;
> - uint32_t pad0:2;
> - uint32_t src0_abs:1;
> - uint32_t src0_negate:1;
> - uint32_t src1_abs:1;
> - uint32_t src1_negate:1;
> - uint32_t src2_abs:1;
> - uint32_t src2_negate:1;
> - uint32_t pad1:7;
> - uint32_t dest_writemask:4;
> - uint32_t dest_subreg_nr:3;
> + uint32_t src0_index_hi:3;
> + uint32_t src1_index:5;
> uint32_t dest_reg_nr:8;
> - } da3src;
> - } bits1;
> -
> - union {
> - struct {
> - uint32_t src0_subreg_nr:5;
> uint32_t src0_reg_nr:8;
> - uint32_t src0_abs:1;
> - uint32_t src0_negate:1;
> - uint32_t src0_address_mode:1;
> - uint32_t src0_horiz_stride:2;
> - uint32_t src0_width:3;
> - uint32_t src0_vert_stride:4;
> - uint32_t flag_sub_reg_nr:1;
> - uint32_t flag_reg_nr:1;
> - uint32_t pad:5;
> - } da1;
> -
> - struct {
> - int src0_indirect_offset:10;
> - uint32_t src0_subreg_nr:3;
> - uint32_t src0_abs:1;
> - uint32_t src0_negate:1;
> - uint32_t src0_address_mode:1;
> - uint32_t src0_horiz_stride:2;
> - uint32_t src0_width:3;
> - uint32_t src0_vert_stride:4;
> - uint32_t flag_sub_reg_nr:1;
> - uint32_t flag_reg_nr:1;
> - uint32_t pad:5;
> - } ia1;
> -
> - struct {
> - uint32_t src0_swz_x:2;
> - uint32_t src0_swz_y:2;
> - uint32_t src0_subreg_nr:1;
> - uint32_t src0_reg_nr:8;
> - uint32_t src0_abs:1;
> - uint32_t src0_negate:1;
> - uint32_t src0_address_mode:1;
> - uint32_t src0_swz_z:2;
> - uint32_t src0_swz_w:2;
> - uint32_t pad0:1;
> - uint32_t src0_vert_stride:4;
> - uint32_t flag_sub_reg_nr:1;
> - uint32_t flag_reg_nr:1;
> - uint32_t pad:5;
> - } da16;
> -
> - struct {
> - uint32_t src0_swz_x:2;
> - uint32_t src0_swz_y:2;
> - int src0_indirect_offset:6;
> - uint32_t src0_subreg_nr:3;
> - uint32_t src0_abs:1;
> - uint32_t src0_negate:1;
> - uint32_t src0_address_mode:1;
> - uint32_t src0_swz_z:2;
> - uint32_t src0_swz_w:2;
> - uint32_t pad0:1;
> - uint32_t src0_vert_stride:4;
> - uint32_t flag_sub_reg_nr:1;
> - uint32_t flag_reg_nr:1;
> - uint32_t pad:5;
> - } ia16;
> -
> - struct {
> - uint32_t src0_rep_ctrl:1;
> - uint32_t src0_swizzle:8;
> - uint32_t src0_subreg_nr:3;
> - uint32_t src0_reg_nr:8;
> - uint32_t pad0:1;
> - uint32_t src1_rep_ctrl:1;
> - uint32_t src1_swizzle:8;
> - uint32_t src1_subreg_nr_low:2;
> - } da3src;
> - } bits2;
> -
> - union {
> - struct {
> - uint32_t src1_subreg_nr:5;
> uint32_t src1_reg_nr:8;
> - uint32_t src1_abs:1;
> - uint32_t src1_negate:1;
> - uint32_t src1_address_mode:1;
> - uint32_t src1_horiz_stride:2;
> - uint32_t src1_width:3;
> - uint32_t src1_vert_stride:4;
> - uint32_t pad0:7;
> - } da1;
> -
> - struct {
> - uint32_t src1_swz_x:2;
> - uint32_t src1_swz_y:2;
> - uint32_t src1_subreg_nr:1;
> - uint32_t src1_reg_nr:8;
> - uint32_t src1_abs:1;
> - uint32_t src1_negate:1;
> - uint32_t src1_address_mode:1;
> - uint32_t src1_swz_z:2;
> - uint32_t src1_swz_w:2;
> - uint32_t pad1:1;
> - uint32_t src1_vert_stride:4;
> - uint32_t pad2:7;
> - } da16;
> -
> - struct {
> - int src1_indirect_offset:10;
> - uint32_t src1_subreg_nr:3;
> - uint32_t src1_abs:1;
> - uint32_t src1_negate:1;
> - uint32_t src1_address_mode:1;
> - uint32_t src1_horiz_stride:2;
> - uint32_t src1_width:3;
> - uint32_t src1_vert_stride:4;
> - uint32_t pad1:7;
> - } ia1;
> -
> - struct {
> - uint32_t src1_swz_x:2;
> - uint32_t src1_swz_y:2;
> - int src1_indirect_offset:6;
> - uint32_t src1_subreg_nr:3;
> - uint32_t src1_abs:1;
> - uint32_t src1_negate:1;
> - uint32_t pad0:1;
> - uint32_t src1_swz_z:2;
> - uint32_t src1_swz_w:2;
> - uint32_t pad1:1;
> - uint32_t src1_vert_stride:4;
> - uint32_t pad2:7;
> - } ia16;
> -
> - struct {
> - uint32_t function_control:19;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad1:2;
> - uint32_t end_of_thread:1;
> - } generic_gen5;
> -
> - struct {
> - uint32_t sub_function_id:3;
> - uint32_t pad0:11;
> - uint32_t ack_req:1;
> - uint32_t notify:2;
> - uint32_t pad1:2;
> - uint32_t header:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } msg_gateway;
> -
> - struct {
> - uint32_t opcode:1;
> - uint32_t request:1;
> - uint32_t pad0:2;
> - uint32_t resource:1;
> - uint32_t pad1:14;
> - uint32_t header:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } spawner_gen5;
> -
> - /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
> - struct {
> - uint32_t function:4;
> - uint32_t int_type:1;
> - uint32_t precision:1;
> - uint32_t saturate:1;
> - uint32_t data_type:1;
> - uint32_t snapshot:1;
> - uint32_t pad0:10;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad1:2;
> - uint32_t end_of_thread:1;
> - } math_gen5;
> + } bits2;
> + };
> +};
>
> +union GenNativeInstruction
> +{
> + struct {
> + struct GenInstruction low;
> + struct GenInstruction high;
> + };
> + struct {
> struct {
> - uint32_t bti:8;
> - uint32_t sampler:4;
> - uint32_t msg_type:5;
> - uint32_t simd_mode:2;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad1:2;
> - uint32_t end_of_thread:1;
> - } sampler_gen7;
> -
> - /**
> - * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> - *
> - * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
> - **/
> - struct {
> - uint32_t bti:8;
> - uint32_t msg_control:5;
> - uint32_t msg_type:3;
> - uint32_t pad0:3;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad1:2;
> - uint32_t end_of_thread:1;
> - } gen6_dp_sampler_const_cache;
> -
> - /*! Data port untyped read / write messages */
> - struct {
> - uint32_t bti:8;
> - uint32_t rgba:4;
> - uint32_t simd_mode:2;
> - uint32_t msg_type:4;
> - uint32_t category:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } gen7_untyped_rw;
> -
> - /*! Data port byte scatter / gather */
> - struct {
> - uint32_t bti:8;
> - uint32_t simd_mode:1;
> - uint32_t ignored0:1;
> - uint32_t data_size:2;
> - uint32_t ignored1:2;
> - uint32_t msg_type:4;
> - uint32_t category:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } gen7_byte_rw;
> -
> - /*! Data port Scratch Read/ write */
> - struct {
> - uint32_t offset:12;
> - uint32_t block_size:2;
> - uint32_t ignored0:1;
> - uint32_t invalidate_after_read:1;
> - uint32_t channel_mode:1;
> - uint32_t msg_type:1;
> - uint32_t category:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } gen7_scratch_rw;
> -
> - /*! Data port OBlock read / write */
> - struct {
> - uint32_t bti:8;
> - uint32_t block_size:3;
> - uint32_t ignored:2;
> - uint32_t invalidate_after_read:1;
> - uint32_t msg_type:4;
> - uint32_t category:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } gen7_oblock_rw;
> -
> - /*! Data port dword scatter / gather */
> - struct {
> - uint32_t bti:8;
> - uint32_t block_size:2;
> - uint32_t ignored0:3;
> - uint32_t invalidate_after_read:1;
> - uint32_t msg_type:4;
> - uint32_t ignored1:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad2:2;
> - uint32_t end_of_thread:1;
> - } gen7_dword_rw;
> -
> - /*! Data port typed read / write messages */
> - struct {
> - uint32_t bti:8;
> - uint32_t chan_mask:4;
> + uint32_t opcode:7;
> uint32_t pad:1;
> - uint32_t slot:1;
> - uint32_t msg_type:4;
> - uint32_t pad2:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad3:2;
> - uint32_t end_of_thread:1;
> - } gen7_typed_rw;
> -
> - /*! Memory fence */
> - struct {
> - uint32_t bti:8;
> - uint32_t pad:5;
> - uint32_t commit_enable:1;
> - uint32_t msg_type:4;
> - uint32_t pad2:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad3:2;
> - uint32_t end_of_thread:1;
> - } gen7_memory_fence;
> -
> - /*! atomic messages */
> - struct {
> - uint32_t bti:8;
> - uint32_t aop_type:4;
> - uint32_t simd_mode:1;
> - uint32_t return_data:1;
> - uint32_t msg_type:4;
> - uint32_t category:1;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad3:2;
> - uint32_t end_of_thread:1;
> - } gen7_atomic_op;
> -
> - struct {
> - uint32_t src1_subreg_nr_high:1;
> - uint32_t src1_reg_nr:8;
> - uint32_t pad0:1;
> - uint32_t src2_rep_ctrl:1;
> - uint32_t src2_swizzle:8;
> - uint32_t src2_subreg_nr:3;
> - uint32_t src2_reg_nr:8;
> - uint32_t pad1:2;
> - } da3src;
> -
> - /*! Message gateway */
> - struct {
> - uint32_t subfunc:3;
> - uint32_t pad:11;
> - uint32_t ackreq:1;
> - uint32_t notify:2;
> - uint32_t pad2:2;
> - uint32_t header_present:1;
> - uint32_t response_length:5;
> - uint32_t msg_length:4;
> - uint32_t pad3:2;
> - uint32_t end_of_thread:1;
> - } gen7_msg_gw;
> -
> - struct {
> - uint32_t jip:16;
> - uint32_t uip:16;
> - } gen7_branch;
> -
> - int d;
> - uint32_t ud;
> - float f;
> - } bits3;
> + uint32_t access_mode:1;
> + uint32_t mask_control:1;
> + uint32_t dependency_control:2;
> + uint32_t quarter_control:2;
> + uint32_t thread_control:2;
> + uint32_t predicate_control:4;
> + uint32_t predicate_inverse:1;
> + uint32_t execution_size:3;
> + uint32_t destreg_or_condmod:4;
> + uint32_t acc_wr_control:1;
> + uint32_t cmpt_control:1;
> + uint32_t debug_control:1;
> + uint32_t saturate:1;
> + } header;
> +
> + union {
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t src1_reg_file:2;
> + uint32_t src1_reg_type:3;
> + uint32_t nib_ctrl:1;
> + uint32_t dest_subreg_nr:5;
> + uint32_t dest_reg_nr:8;
> + uint32_t dest_horiz_stride:2;
> + uint32_t dest_address_mode:1;
> + } da1;
> +
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t src1_reg_file:2; /* 0x00000c00 */
> + uint32_t src1_reg_type:3; /* 0x00007000 */
> + uint32_t nib_ctrl:1;
> + int dest_indirect_offset:10; /* offset against the deref'd address reg */
> + uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
> + uint32_t dest_horiz_stride:2;
> + uint32_t dest_address_mode:1;
> + } ia1;
> +
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t src1_reg_file:2;
> + uint32_t src1_reg_type:3;
> + uint32_t nib_ctrl:1;
> + uint32_t dest_writemask:4;
> + uint32_t dest_subreg_nr:1;
> + uint32_t dest_reg_nr:8;
> + uint32_t dest_horiz_stride:2;
> + uint32_t dest_address_mode:1;
> + } da16;
> +
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t nib_ctrl:1;
> + uint32_t dest_writemask:4;
> + int dest_indirect_offset:6;
> + uint32_t dest_subreg_nr:3;
> + uint32_t dest_horiz_stride:2;
> + uint32_t dest_address_mode:1;
> + } ia16;
> +
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t src1_reg_file:2;
> + uint32_t src1_reg_type:3;
> + uint32_t pad:1;
> + int jump_count:16;
> + } branch_gen6;
> +
> + struct {
> + uint32_t dest_reg_file:1;
> + uint32_t flag_subreg_num:1;
> + uint32_t pad0:2;
> + uint32_t src0_abs:1;
> + uint32_t src0_negate:1;
> + uint32_t src1_abs:1;
> + uint32_t src1_negate:1;
> + uint32_t src2_abs:1;
> + uint32_t src2_negate:1;
> + uint32_t pad1:7;
> + uint32_t dest_writemask:4;
> + uint32_t dest_subreg_nr:3;
> + uint32_t dest_reg_nr:8;
> + } da3src;
> + } bits1;
> +
> + union {
> + struct {
> + uint32_t src0_subreg_nr:5;
> + uint32_t src0_reg_nr:8;
> + uint32_t src0_abs:1;
> + uint32_t src0_negate:1;
> + uint32_t src0_address_mode:1;
> + uint32_t src0_horiz_stride:2;
> + uint32_t src0_width:3;
> + uint32_t src0_vert_stride:4;
> + uint32_t flag_sub_reg_nr:1;
> + uint32_t flag_reg_nr:1;
> + uint32_t pad:5;
> + } da1;
> +
> + struct {
> + int src0_indirect_offset:10;
> + uint32_t src0_subreg_nr:3;
> + uint32_t src0_abs:1;
> + uint32_t src0_negate:1;
> + uint32_t src0_address_mode:1;
> + uint32_t src0_horiz_stride:2;
> + uint32_t src0_width:3;
> + uint32_t src0_vert_stride:4;
> + uint32_t flag_sub_reg_nr:1;
> + uint32_t flag_reg_nr:1;
> + uint32_t pad:5;
> + } ia1;
> +
> + struct {
> + uint32_t src0_swz_x:2;
> + uint32_t src0_swz_y:2;
> + uint32_t src0_subreg_nr:1;
> + uint32_t src0_reg_nr:8;
> + uint32_t src0_abs:1;
> + uint32_t src0_negate:1;
> + uint32_t src0_address_mode:1;
> + uint32_t src0_swz_z:2;
> + uint32_t src0_swz_w:2;
> + uint32_t pad0:1;
> + uint32_t src0_vert_stride:4;
> + uint32_t flag_sub_reg_nr:1;
> + uint32_t flag_reg_nr:1;
> + uint32_t pad:5;
> + } da16;
> +
> + struct {
> + uint32_t src0_swz_x:2;
> + uint32_t src0_swz_y:2;
> + int src0_indirect_offset:6;
> + uint32_t src0_subreg_nr:3;
> + uint32_t src0_abs:1;
> + uint32_t src0_negate:1;
> + uint32_t src0_address_mode:1;
> + uint32_t src0_swz_z:2;
> + uint32_t src0_swz_w:2;
> + uint32_t pad0:1;
> + uint32_t src0_vert_stride:4;
> + uint32_t flag_sub_reg_nr:1;
> + uint32_t flag_reg_nr:1;
> + uint32_t pad:5;
> + } ia16;
> +
> + struct {
> + uint32_t src0_rep_ctrl:1;
> + uint32_t src0_swizzle:8;
> + uint32_t src0_subreg_nr:3;
> + uint32_t src0_reg_nr:8;
> + uint32_t pad0:1;
> + uint32_t src1_rep_ctrl:1;
> + uint32_t src1_swizzle:8;
> + uint32_t src1_subreg_nr_low:2;
> + } da3src;
> + } bits2;
> +
> + union {
> + struct {
> + uint32_t src1_subreg_nr:5;
> + uint32_t src1_reg_nr:8;
> + uint32_t src1_abs:1;
> + uint32_t src1_negate:1;
> + uint32_t src1_address_mode:1;
> + uint32_t src1_horiz_stride:2;
> + uint32_t src1_width:3;
> + uint32_t src1_vert_stride:4;
> + uint32_t pad0:7;
> + } da1;
> +
> + struct {
> + uint32_t src1_swz_x:2;
> + uint32_t src1_swz_y:2;
> + uint32_t src1_subreg_nr:1;
> + uint32_t src1_reg_nr:8;
> + uint32_t src1_abs:1;
> + uint32_t src1_negate:1;
> + uint32_t src1_address_mode:1;
> + uint32_t src1_swz_z:2;
> + uint32_t src1_swz_w:2;
> + uint32_t pad1:1;
> + uint32_t src1_vert_stride:4;
> + uint32_t pad2:7;
> + } da16;
> +
> + struct {
> + int src1_indirect_offset:10;
> + uint32_t src1_subreg_nr:3;
> + uint32_t src1_abs:1;
> + uint32_t src1_negate:1;
> + uint32_t src1_address_mode:1;
> + uint32_t src1_horiz_stride:2;
> + uint32_t src1_width:3;
> + uint32_t src1_vert_stride:4;
> + uint32_t pad1:7;
> + } ia1;
> +
> + struct {
> + uint32_t src1_swz_x:2;
> + uint32_t src1_swz_y:2;
> + int src1_indirect_offset:6;
> + uint32_t src1_subreg_nr:3;
> + uint32_t src1_abs:1;
> + uint32_t src1_negate:1;
> + uint32_t pad0:1;
> + uint32_t src1_swz_z:2;
> + uint32_t src1_swz_w:2;
> + uint32_t pad1:1;
> + uint32_t src1_vert_stride:4;
> + uint32_t pad2:7;
> + } ia16;
> +
> + struct {
> + uint32_t function_control:19;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } generic_gen5;
> +
> + struct {
> + uint32_t sub_function_id:3;
> + uint32_t pad0:11;
> + uint32_t ack_req:1;
> + uint32_t notify:2;
> + uint32_t pad1:2;
> + uint32_t header:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } msg_gateway;
> +
> + struct {
> + uint32_t opcode:1;
> + uint32_t request:1;
> + uint32_t pad0:2;
> + uint32_t resource:1;
> + uint32_t pad1:14;
> + uint32_t header:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } spawner_gen5;
> +
> + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
> + struct {
> + uint32_t function:4;
> + uint32_t int_type:1;
> + uint32_t precision:1;
> + uint32_t saturate:1;
> + uint32_t data_type:1;
> + uint32_t snapshot:1;
> + uint32_t pad0:10;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } math_gen5;
> +
> + struct {
> + uint32_t bti:8;
> + uint32_t sampler:4;
> + uint32_t msg_type:5;
> + uint32_t simd_mode:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } sampler_gen7;
> +
> + /**
> + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> + *
> + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
> + **/
> + struct {
> + uint32_t bti:8;
> + uint32_t msg_control:5;
> + uint32_t msg_type:3;
> + uint32_t pad0:3;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad1:2;
> + uint32_t end_of_thread:1;
> + } gen6_dp_sampler_const_cache;
> +
> + /*! Data port untyped read / write messages */
> + struct {
> + uint32_t bti:8;
> + uint32_t rgba:4;
> + uint32_t simd_mode:2;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_untyped_rw;
> +
> + /*! Data port byte scatter / gather */
> + struct {
> + uint32_t bti:8;
> + uint32_t simd_mode:1;
> + uint32_t ignored0:1;
> + uint32_t data_size:2;
> + uint32_t ignored1:2;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_byte_rw;
> +
> + /*! Data port Scratch Read/ write */
> + struct {
> + uint32_t offset:12;
> + uint32_t block_size:2;
> + uint32_t ignored0:1;
> + uint32_t invalidate_after_read:1;
> + uint32_t channel_mode:1;
> + uint32_t msg_type:1;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_scratch_rw;
> +
> + /*! Data port OBlock read / write */
> + struct {
> + uint32_t bti:8;
> + uint32_t block_size:3;
> + uint32_t ignored:2;
> + uint32_t invalidate_after_read:1;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_oblock_rw;
> +
> + /*! Data port dword scatter / gather */
> + struct {
> + uint32_t bti:8;
> + uint32_t block_size:2;
> + uint32_t ignored0:3;
> + uint32_t invalidate_after_read:1;
> + uint32_t msg_type:4;
> + uint32_t ignored1:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad2:2;
> + uint32_t end_of_thread:1;
> + } gen7_dword_rw;
> +
> + /*! Data port typed read / write messages */
> + struct {
> + uint32_t bti:8;
> + uint32_t chan_mask:4;
> + uint32_t pad:1;
> + uint32_t slot:1;
> + uint32_t msg_type:4;
> + uint32_t pad2:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen7_typed_rw;
> +
> + /*! Memory fence */
> + struct {
> + uint32_t bti:8;
> + uint32_t pad:5;
> + uint32_t commit_enable:1;
> + uint32_t msg_type:4;
> + uint32_t pad2:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen7_memory_fence;
> +
> + /*! atomic messages */
> + struct {
> + uint32_t bti:8;
> + uint32_t aop_type:4;
> + uint32_t simd_mode:1;
> + uint32_t return_data:1;
> + uint32_t msg_type:4;
> + uint32_t category:1;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen7_atomic_op;
> +
> + struct {
> + uint32_t src1_subreg_nr_high:1;
> + uint32_t src1_reg_nr:8;
> + uint32_t pad0:1;
> + uint32_t src2_rep_ctrl:1;
> + uint32_t src2_swizzle:8;
> + uint32_t src2_subreg_nr:3;
> + uint32_t src2_reg_nr:8;
> + uint32_t pad1:2;
> + } da3src;
> +
> + /*! Message gateway */
> + struct {
> + uint32_t subfunc:3;
> + uint32_t pad:11;
> + uint32_t ackreq:1;
> + uint32_t notify:2;
> + uint32_t pad2:2;
> + uint32_t header_present:1;
> + uint32_t response_length:5;
> + uint32_t msg_length:4;
> + uint32_t pad3:2;
> + uint32_t end_of_thread:1;
> + } gen7_msg_gw;
> +
> + struct {
> + uint32_t jip:16;
> + uint32_t uip:16;
> + } gen7_branch;
> +
> + int d;
> + uint32_t ud;
> + float f;
> + } bits3;
> + };
> };
>
> #endif /* __GEN_DEFS_HPP__ */
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index 9df031e..8b5057e 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -51,8 +51,11 @@
> #include "backend/gen_encoder.hpp"
> #include <cstring>
>
> +
> namespace gbe
> {
> + extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
> + extern bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split);
> //////////////////////////////////////////////////////////////////////////
> // Some helper functions to encode
> //////////////////////////////////////////////////////////////////////////
> @@ -91,7 +94,7 @@ namespace gbe
> }
>
> static void setMessageDescriptor(GenEncoder *p,
> - GenInstruction *inst,
> + GenNativeInstruction *inst,
> enum GenMessageTarget sfid,
> unsigned msg_length,
> unsigned response_length,
> @@ -107,7 +110,7 @@ namespace gbe
> }
>
> static void setDPUntypedRW(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> uint32_t bti,
> uint32_t rgba,
> uint32_t msg_type,
> @@ -128,7 +131,7 @@ namespace gbe
> }
>
> static void setDPByteScatterGather(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> uint32_t bti,
> uint32_t elem_size,
> uint32_t msg_type,
> @@ -149,7 +152,7 @@ namespace gbe
> }
> #if 0
> static void setOBlockRW(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> uint32_t bti,
> uint32_t size,
> uint32_t msg_type,
> @@ -167,7 +170,7 @@ namespace gbe
> #endif
>
> static void setSamplerMessage(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> unsigned char bti,
> unsigned char sampler,
> uint32_t msg_type,
> @@ -187,7 +190,7 @@ namespace gbe
>
>
> static void setTypedWriteMessage(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> unsigned char bti,
> unsigned char msg_type,
> uint32_t msg_length,
> @@ -199,7 +202,7 @@ namespace gbe
> insn->bits3.gen7_typed_rw.msg_type = msg_type;
> }
> static void setDWordScatterMessgae(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> uint32_t bti,
> uint32_t block_size,
> uint32_t msg_type,
> @@ -238,7 +241,7 @@ namespace gbe
> curr = stack[--stateNum];
> }
>
> - void GenEncoder::setHeader(GenInstruction *insn) {
> + void GenEncoder::setHeader(GenNativeInstruction *insn) {
> if (this->curr.execWidth == 8)
> insn->header.execution_size = GEN_WIDTH_8;
> else if (this->curr.execWidth == 16)
> @@ -260,7 +263,7 @@ namespace gbe
> insn->header.saturate = this->curr.saturate;
> }
>
> - void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) {
> + void GenEncoder::setDst(GenNativeInstruction *insn, GenRegister dest) {
> if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
> assert(dest.nr < 128);
>
> @@ -274,7 +277,7 @@ namespace gbe
> insn->bits1.da1.dest_horiz_stride = dest.hstride;
> }
>
> - void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) {
> + void GenEncoder::setSrc0(GenNativeInstruction *insn, GenRegister reg) {
> if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
> assert(reg.nr < 128);
>
> @@ -327,7 +330,7 @@ namespace gbe
> }
> }
>
> - void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) {
> + void GenEncoder::setSrc1(GenNativeInstruction *insn, GenRegister reg) {
> assert(reg.nr < 128);
> assert(reg.file != GEN_ARCHITECTURE_REGISTER_FILE || reg.nr == 0);
>
> @@ -442,7 +445,7 @@ namespace gbe
> }
>
> void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> @@ -469,7 +472,7 @@ namespace gbe
> }
>
> void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> assert(elemNum >= 1 || elemNum <= 4);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> @@ -495,7 +498,7 @@ namespace gbe
> }
>
> void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> if (this->curr.execWidth == 8) {
> @@ -521,7 +524,7 @@ namespace gbe
> }
>
> void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> this->setHeader(insn);
> @@ -545,7 +548,7 @@ namespace gbe
> }
>
> void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
> uint32_t block_size = 0;
> @@ -575,7 +578,7 @@ namespace gbe
> }
>
> void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> uint32_t msg_length = 0;
> uint32_t response_length = 0;
>
> @@ -608,13 +611,21 @@ namespace gbe
> NOT_SUPPORTED;
>
> }
> + GenCompactInstruction *GenEncoder::nextCompact(uint32_t opcode) {
> + GenCompactInstruction insn;
> + std::memset(&insn, 0, sizeof(GenCompactInstruction));
> + insn.bits1.opcode = opcode;
> + this->store.push_back(insn.low);
> + return (GenCompactInstruction *)&this->store.back();
> + }
>
> - GenInstruction *GenEncoder::next(uint32_t opcode) {
> - GenInstruction insn;
> - std::memset(&insn, 0, sizeof(GenInstruction));
> + GenNativeInstruction *GenEncoder::next(uint32_t opcode) {
> + GenNativeInstruction insn;
> + std::memset(&insn, 0, sizeof(GenNativeInstruction));
> insn.header.opcode = opcode;
> - this->store.push_back(insn);
> - return &this->store.back();
> + this->store.push_back(insn.low);
> + this->store.push_back(insn.high);
> + return (GenNativeInstruction *)(&this->store.back()-1);
> }
>
> INLINE void _handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
> @@ -622,7 +633,7 @@ namespace gbe
> int w = p->curr.execWidth;
> p->push();
> p->curr.nibControl = 0;
> - GenInstruction *insn = p->next(opcode);
> + GenNativeInstruction *insn = p->next(opcode);
> p->setHeader(insn);
> p->setDst(insn, dst);
> p->setSrc0(insn, src0);
> @@ -678,7 +689,9 @@ namespace gbe
> }
> p->pop();
> } else if (needToSplitAlu1(p, dst, src) == false) {
> - GenInstruction *insn = p->next(opcode);
> + if(compactAlu1(p, opcode, dst, src, condition, false))
> + return;
> + GenNativeInstruction *insn = p->next(opcode);
> if (condition != 0) {
> GBE_ASSERT(opcode == GEN_OPCODE_MOV ||
> opcode == GEN_OPCODE_NOT);
> @@ -688,7 +701,7 @@ namespace gbe
> p->setDst(insn, dst);
> p->setSrc0(insn, src);
> } else {
> - GenInstruction *insnQ1, *insnQ2;
> + GenNativeInstruction *insnQ1, *insnQ2;
>
> // Instruction for the first quarter
> insnQ1 = p->next(opcode);
> @@ -718,7 +731,9 @@ namespace gbe
> if (dst.isdf() && src0.isdf() && src1.isdf()) {
> handleDouble(p, opcode, dst, src0, src1);
> } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
> - GenInstruction *insn = p->next(opcode);
> + if(compactAlu2(p, opcode, dst, src0, src1, condition, false))
> + return;
> + GenNativeInstruction *insn = p->next(opcode);
> if (condition != 0) {
> GBE_ASSERT(opcode == GEN_OPCODE_OR ||
> opcode == GEN_OPCODE_XOR ||
> @@ -730,7 +745,7 @@ namespace gbe
> p->setSrc0(insn, src0);
> p->setSrc1(insn, src1);
> } else {
> - GenInstruction *insnQ1, *insnQ2;
> + GenNativeInstruction *insnQ1, *insnQ2;
>
> // Instruction for the first quarter
> insnQ1 = p->next(opcode);
> @@ -754,14 +769,14 @@ namespace gbe
>
> #define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6))
>
> - static GenInstruction *alu3(GenEncoder *p,
> + static GenNativeInstruction *alu3(GenEncoder *p,
> uint32_t opcode,
> GenRegister dest,
> GenRegister src0,
> GenRegister src1,
> GenRegister src2)
> {
> - GenInstruction *insn = p->next(opcode);
> + GenNativeInstruction *insn = p->next(opcode);
>
> assert(dest.file == GEN_GENERAL_REGISTER_FILE);
> assert(dest.nr < 128);
> @@ -811,7 +826,7 @@ namespace gbe
>
> // Emit second half of the instruction
> if (p->curr.execWidth == 16) {
> - GenInstruction q1Insn = *insn;
> + GenNativeInstruction q1Insn = *insn;
> insn = p->next(opcode);
> *insn = q1Insn;
> insn->header.quarter_control = GEN_COMPRESSION_Q2;
> @@ -1048,14 +1063,14 @@ namespace gbe
>
>
> void GenEncoder::NOP(void) {
> - GenInstruction *insn = this->next(GEN_OPCODE_NOP);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_NOP);
> this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
> this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
> this->setSrc1(insn, GenRegister::immud(0x0));
> }
>
> void GenEncoder::BARRIER(GenRegister src) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setHeader(insn);
> this->setDst(insn, GenRegister::null());
> this->setSrc0(insn, src);
> @@ -1064,7 +1079,7 @@ namespace gbe
> insn->bits3.msg_gateway.notify = 0x1;
> }
> void GenEncoder::FENCE(GenRegister dst) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setHeader(insn);
> this->setDst(insn, dst);
> this->setSrc0(insn, dst);
> @@ -1090,7 +1105,7 @@ namespace gbe
> ALU2_BRA(BRC)
>
> void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
> - GenInstruction &insn = this->store[insnID];
> + GenNativeInstruction &insn = *(GenNativeInstruction *)&this->store[insnID];
> GBE_ASSERT(insnID < this->store.size());
> GBE_ASSERT(insn.header.opcode == GEN_OPCODE_JMPI ||
> insn.header.opcode == GEN_OPCODE_BRD ||
> @@ -1118,7 +1133,7 @@ namespace gbe
> // for all the branching instruction. And need to adjust the distance
> // for those branch instruction's start point and end point contains
> // this instruction.
> - GenInstruction &insn2 = this->store[insnID+1];
> + GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
> GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
> insn.header.opcode = GEN_OPCODE_ADD;
> this->setDst(&insn, GenRegister::ip());
> @@ -1127,7 +1142,7 @@ namespace gbe
> } else {
> insn.header.predicate_inverse ^= 1;
> this->setSrc1(&insn, GenRegister::immd(2));
> - GenInstruction &insn2 = this->store[insnID+1];
> + GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
> GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
> GBE_ASSERT(insnID < this->store.size());
> insn2.header.predicate_control = GEN_PREDICATE_NONE;
> @@ -1140,7 +1155,10 @@ namespace gbe
>
> void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
> if (needToSplitCmp(this, src0, src1) == false) {
> - GenInstruction *insn = this->next(GEN_OPCODE_CMP);
> + if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
> + return;
> + }
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
> this->setHeader(insn);
> insn->header.destreg_or_condmod = conditional;
> insn->header.thread_control = GEN_THREAD_SWITCH;
> @@ -1148,7 +1166,7 @@ namespace gbe
> this->setSrc0(insn, src0);
> this->setSrc1(insn, src1);
> } else {
> - GenInstruction *insnQ1, *insnQ2;
> + GenNativeInstruction *insnQ1, *insnQ2;
>
> // Instruction for the first quarter
> insnQ1 = this->next(GEN_OPCODE_CMP);
> @@ -1177,7 +1195,7 @@ namespace gbe
> GenRegister src0,
> GenRegister src1)
> {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEL);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEL);
> GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
> this->setHeader(insn);
> insn->header.destreg_or_condmod = conditional;
> @@ -1187,7 +1205,7 @@ namespace gbe
> }
>
> void GenEncoder::WAIT(void) {
> - GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
> GenRegister src = GenRegister::notification1();
> this->setDst(insn, GenRegister::null());
> this->setSrc0(insn, src);
> @@ -1198,7 +1216,7 @@ namespace gbe
> }
>
> void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) {
> - GenInstruction *insn = this->next(GEN_OPCODE_MATH);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
> assert(dst.file == GEN_GENERAL_REGISTER_FILE);
> assert(src0.file == GEN_GENERAL_REGISTER_FILE);
> assert(src1.file == GEN_GENERAL_REGISTER_FILE);
> @@ -1226,7 +1244,7 @@ namespace gbe
> insn->header.quarter_control = GEN_COMPRESSION_Q1;
>
> if(this->curr.execWidth == 16) {
> - GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
> + GenNativeInstruction *insn2 = this->next(GEN_OPCODE_MATH);
> GenRegister new_dest, new_src0, new_src1;
> new_dest = GenRegister::QnPhysical(dst, 1);
> new_src0 = GenRegister::QnPhysical(src0, 1);
> @@ -1244,7 +1262,7 @@ namespace gbe
> }
>
> void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
> - GenInstruction *insn = this->next(GEN_OPCODE_MATH);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
> assert(dst.file == GEN_GENERAL_REGISTER_FILE);
> assert(src.file == GEN_GENERAL_REGISTER_FILE);
> assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
> @@ -1275,7 +1293,7 @@ namespace gbe
> msg_length++;
> uint32_t simd_mode = (simdWidth == 16) ?
> GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setHeader(insn);
> this->setDst(insn, dest);
> this->setSrc0(insn, msg);
> @@ -1287,7 +1305,7 @@ namespace gbe
>
> void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
> {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> uint32_t msg_type = GEN_TYPED_WRITE;
> uint32_t msg_length = header_present ? 9 : 8;
> this->setHeader(insn);
> @@ -1296,7 +1314,7 @@ namespace gbe
> setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
> }
> static void setScratchMessage(GenEncoder *p,
> - GenInstruction *insn,
> + GenNativeInstruction *insn,
> uint32_t offset,
> uint32_t block_size,
> uint32_t channel_mode,
> @@ -1317,7 +1335,7 @@ namespace gbe
> {
> assert(src_num == 1 || src_num ==2);
> uint32_t block_size = src_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setHeader(insn);
> this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
> this->setSrc0(insn, msg);
> @@ -1330,7 +1348,7 @@ namespace gbe
> {
> assert(dst_num == 1 || dst_num ==2);
> uint32_t block_size = dst_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setHeader(insn);
> this->setDst(insn, dst);
> this->setSrc0(insn, src);
> @@ -1340,7 +1358,7 @@ namespace gbe
> }
>
> void GenEncoder::EOT(uint32_t msg) {
> - GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
> this->setSrc0(insn, GenRegister::ud8grf(msg,0));
> this->setSrc1(insn, GenRegister::immud(0));
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 50662fb..4c65a9c 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -202,11 +202,12 @@ namespace gbe
> ////////////////////////////////////////////////////////////////////////
> // Helper functions to encode
> ////////////////////////////////////////////////////////////////////////
> - void setHeader(GenInstruction *insn);
> - void setDst(GenInstruction *insn, GenRegister dest);
> - void setSrc0(GenInstruction *insn, GenRegister reg);
> - void setSrc1(GenInstruction *insn, GenRegister reg);
> - GenInstruction *next(uint32_t opcode);
> + void setHeader(GenNativeInstruction *insn);
> + void setDst(GenNativeInstruction *insn, GenRegister dest);
> + void setSrc0(GenNativeInstruction *insn, GenRegister reg);
> + void setSrc1(GenNativeInstruction *insn, GenRegister reg);
> + GenCompactInstruction *nextCompact(uint32_t opcode);
> + GenNativeInstruction *next(uint32_t opcode);
> uint32_t n_instruction(void) const { return store.size(); }
> GBE_CLASS(GenEncoder); //!< Use custom allocators
> };
> diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
> new file mode 100644
> index 0000000..5b0a897
> --- /dev/null
> +++ b/backend/src/backend/gen_insn_compact.cpp
> @@ -0,0 +1,521 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>
> + */
> +#include "backend/gen_defs.hpp"
> +#include "backend/gen_encoder.hpp"
> +#include <cstring>
> +
> +namespace gbe {
> +
> + struct compact_table_entry {
> + uint32_t bit_pattern;
> + uint32_t index;
> + };
> +
> + static compact_table_entry control_table[] = {
> + {0b0000000000000000010, 0},
> + {0b0000100000000000000, 1},
> + {0b0000100000000000001, 2},
> + {0b0000100000000000010, 3},
> + {0b0000100000000000011, 4},
> + {0b0000100000000000100, 5},
> + {0b0000100000000000101, 6},
> + {0b0000100000000000111, 7},
> + {0b0000100000000001000, 8},
> + {0b0000100000000001001, 9},
> + {0b0000100000000001101, 10},
> + {0b0000110000000000000, 11},
> + {0b0000110000000000001, 12},
> + {0b0000110000000000010, 13},
> + {0b0000110000000000011, 14},
> + {0b0000110000000000100, 15},
> + {0b0000110000000000101, 16},
> + {0b0000110000000000111, 17},
> + {0b0000110000000001001, 18},
> + {0b0000110000000001101, 19},
> + {0b0000110000000010000, 20},
> + {0b0000110000100000000, 21},
> + {0b0001000000000000000, 22},
> + {0b0001000000000000010, 23},
> + {0b0001000000000000100, 24},
> + {0b0001000000100000000, 25},
> + {0b0010110000000000000, 26},
> + {0b0010110000000010000, 27},
> + {0b0011000000000000000, 28},
> + {0b0011000000100000000, 29},
> + {0b0101000000000000000, 30},
> + {0b0101000000100000000, 31},
> + };
> +
> + static compact_table_entry data_type_table[] = {
> + {0b000000001000001100, 20},
> + {0b001000000000000001, 0},
> + {0b001000000000100000, 1},
> + {0b001000000000100001, 2},
> + {0b001000000000111101, 21},
> + {0b001000000001100001, 3},
> + {0b001000000010100101, 22},
> + {0b001000000010111101, 4},
> + {0b001000001011111101, 5},
> + {0b001000001110100001, 6},
> + {0b001000001110100101, 7},
> + {0b001000001110111101, 8},
> + {0b001000010000100000, 23},
> + {0b001000010000100001, 9},
> + {0b001000110000100000, 10},
> + {0b001000110000100001, 11},
> + {0b001001010010100100, 24},
> + {0b001001010010100101, 12},
> + {0b001001110010000100, 25},
> + {0b001001110010100100, 13},
> + {0b001001110010100101, 14},
> + {0b001010010100001001, 26},
> + {0b001010010100101000, 30},
> + {0b001010110100101000, 31},
> + {0b001011110110101100, 29},
> + {0b001101111110111101, 27},
> + {0b001111001110111101, 15},
> + {0b001111011110011101, 16},
> + {0b001111011110111100, 17},
> + {0b001111011110111101, 18},
> + {0b001111111110111100, 19},
> + {0b001111111110111101, 28},
> + };
> +
> + static compact_table_entry data_type_decompact[] = {
> + {0b001000000000000001, 0},
> + {0b001000000000100000, 1},
> + {0b001000000000100001, 2},
> + {0b001000000001100001, 3},
> + {0b001000000010111101, 4},
> + {0b001000001011111101, 5},
> + {0b001000001110100001, 6},
> + {0b001000001110100101, 7},
> + {0b001000001110111101, 8},
> + {0b001000010000100001, 9},
> + {0b001000110000100000, 10},
> + {0b001000110000100001, 11},
> + {0b001001010010100101, 12},
> + {0b001001110010100100, 13},
> + {0b001001110010100101, 14},
> + {0b001111001110111101, 15},
> + {0b001111011110011101, 16},
> + {0b001111011110111100, 17},
> + {0b001111011110111101, 18},
> + {0b001111111110111100, 19},
> + {0b000000001000001100, 20},
> + {0b001000000000111101, 21},
> + {0b001000000010100101, 22},
> + {0b001000010000100000, 23},
> + {0b001001010010100100, 24},
> + {0b001001110010000100, 25},
> + {0b001010010100001001, 26},
> + {0b001101111110111101, 27},
> + {0b001111111110111101, 28},
> + {0b001011110110101100, 29},
> + {0b001010010100101000, 30},
> + {0b001010110100101000, 31},
> + };
> +
> + static compact_table_entry subreg_table[] = {
> + {0b000000000000000, 0},
> + {0b000000000000001, 1},
> + {0b000000000001000, 2},
> + {0b000000000001111, 3},
> + {0b000000000010000, 4},
> + {0b000000010000000, 5},
> + {0b000000100000000, 6},
> + {0b000000110000000, 7},
> + {0b000001000000000, 8},
> + {0b000001000010000, 9},
> + {0b000001010000000, 10},
> + {0b001000000000000, 11},
> + {0b001000000000001, 12},
> + {0b001000010000001, 13},
> + {0b001000010000010, 14},
> + {0b001000010000011, 15},
> + {0b001000010000100, 16},
> + {0b001000010000111, 17},
> + {0b001000010001000, 18},
> + {0b001000010001110, 19},
> + {0b001000010001111, 20},
> + {0b001000110000000, 21},
> + {0b001000111101000, 22},
> + {0b010000000000000, 23},
> + {0b010000110000000, 24},
> + {0b011000000000000, 25},
> + {0b011110010000111, 26},
> + {0b100000000000000, 27},
> + {0b101000000000000, 28},
> + {0b110000000000000, 29},
> + {0b111000000000000, 30},
> + {0b111000000011100, 31},
> + };
> +
> + static compact_table_entry srcreg_table[] = {
> + {0b000000000000, 0},
> + {0b000000000010, 1},
> + {0b000000010000, 2},
> + {0b000000010010, 3},
> + {0b000000011000, 4},
> + {0b000000100000, 5},
> + {0b000000101000, 6},
> + {0b000001001000, 7},
> + {0b000001010000, 8},
> + {0b000001110000, 9},
> + {0b000001111000, 10},
> + {0b001100000000, 11},
> + {0b001100000010, 12},
> + {0b001100001000, 13},
> + {0b001100010000, 14},
> + {0b001100010010, 15},
> + {0b001100100000, 16},
> + {0b001100101000, 17},
> + {0b001100111000, 18},
> + {0b001101000000, 19},
> + {0b001101000010, 20},
> + {0b001101001000, 21},
> + {0b001101010000, 22},
> + {0b001101100000, 23},
> + {0b001101101000, 24},
> + {0b001101110000, 25},
> + {0b001101110001, 26},
> + {0b001101111000, 27},
> + {0b010001101000, 28},
> + {0b010001101001, 29},
> + {0b010001101010, 30},
> + {0b010110001000, 31},
> + };
> +
> + static int cmp_key(const void *p1, const void*p2) {
> + const compact_table_entry * px = (compact_table_entry *)p1;
> + const compact_table_entry * py = (compact_table_entry *)p2;
> + return (px->bit_pattern) - py->bit_pattern;
> + }
> + union ControlBits{
> + struct {
> + uint32_t access_mode:1;
> + uint32_t mask_control:1;
> + uint32_t dependency_control:2;
> + uint32_t quarter_control:2;
> + uint32_t thread_control:2;
> + uint32_t predicate_control:4;
> + uint32_t predicate_inverse:1;
> + uint32_t execution_size:3;
> + uint32_t saturate:1;
> + uint32_t flag_sub_reg_nr:1;
> + uint32_t flag_reg_nr:1;
> + uint32_t pad:23;
> + };
> + uint32_t data;
> + };
> + union DataTypeBits{
> + struct {
> + uint32_t dest_reg_file:2;
> + uint32_t dest_reg_type:3;
> + uint32_t src0_reg_file:2;
> + uint32_t src0_reg_type:3;
> + uint32_t src1_reg_file:2;
> + uint32_t src1_reg_type:3;
> + uint32_t dest_horiz_stride:2;
> + uint32_t dest_address_mode:1;
> + uint32_t pad:14;
> + };
> + uint32_t data;
> + };
> + union SubRegBits {
> + struct {
> + uint32_t dest_subreg_nr:5;
> + uint32_t src0_subreg_nr:5;
> + uint32_t src1_subreg_nr:5;
> + uint32_t pad:17;
> + };
> + uint32_t data;
> + };
> + union SrcRegBits {
> + struct {
> + uint32_t src_abs:1;
> + uint32_t src_negate:1;
> + uint32_t src_address_mode:1;
> + uint32_t src_horiz_stride:2;
> + uint32_t src_width:3;
> + uint32_t src_vert_stride:4;
> + uint32_t pad:20;
> + };
> + uint32_t data;
> + };
> +
> + void decompactInstruction(GenCompactInstruction * p, GenNativeInstruction *pOut) {
> +
> + memset(pOut, 0, sizeof(GenNativeInstruction));
> + union ControlBits control_bits;
> + control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern;
> + pOut->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8);
> + pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod;
> + pOut->header.saturate = control_bits.saturate;
> + pOut->header.acc_wr_control = p->bits1.acc_wr_control;
> + pOut->header.cmpt_control = p->bits1.cmpt_control;
> + pOut->header.debug_control = p->bits1.debug_control;
> +
> + union DataTypeBits data_type_bits;
> + union SubRegBits subreg_bits;
> + union SrcRegBits src0_bits;
> + data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern;
> + subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern;
> + src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern;
> +
> + pOut->low.high |= data_type_bits.data & 0x7fff;
> + pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride;
> + pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode;
> + pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr;
> + pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr;
> +
> + pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr;
> + pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr;
> + pOut->high.low |= (src0_bits.data << 13);
> + pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr;
> + pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr;
> +
> + if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) {
> + uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8);
> + pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm;
> + } else {
> + union SrcRegBits src1_bits;
> + src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern;
> + pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr;
> + pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr;
> + pOut->high.high |= (src1_bits.data << 13);
> + }
> + }
> +
> + int compactControlBits(GenEncoder *p, uint32_t quarter, uint32_t execWidth) {
> +
> + const GenInstructionState *s = &p->curr;
> + // some quick check
> + if(s->nibControl != 0)
> + return -1;
> + if(s->predicate > GEN_PREDICATE_NORMAL)
> + return -1;
> + if(s->flag == 1)
> + return -1;
> +
> + ControlBits b;
> + b.data = 0;
> +
> + if (execWidth == 8)
> + b.execution_size = GEN_WIDTH_8;
> + else if (execWidth == 16)
> + b.execution_size = GEN_WIDTH_16;
> + else if (execWidth == 1)
> + b.execution_size = GEN_WIDTH_1;
> + else
> + NOT_IMPLEMENTED;
> +
> + b.mask_control = s->noMask;
> + b.quarter_control = quarter;
> + b.predicate_control = s->predicate;
> + b.predicate_inverse = s->inversePredicate;
> +
> + b.saturate = s->saturate;
> + b.flag_sub_reg_nr = s->subFlag;
> + b.flag_reg_nr = s->flag;
> +
> + compact_table_entry key;
> + key.bit_pattern = b.data;
> +
> + compact_table_entry *r = (compact_table_entry *)bsearch(&key, control_table,
> + sizeof(control_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> + if (r == NULL)
> + return -1;
> + return r->index;
> + }
> +
> + int compactDataTypeBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
> +
> + // compact does not support any indirect acess
> + if(dst->address_mode != GEN_ADDRESS_DIRECT)
> + return -1;
> +
> + if(src0->file == GEN_IMMEDIATE_VALUE)
> + return -1;
> +
> + DataTypeBits b;
> + b.data = 0;
> +
> + b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride;
> + b.dest_address_mode = dst->address_mode;
> + b.dest_reg_file = dst->file;
> + b.dest_reg_type = dst->type;
> +
> + b.src0_reg_file = src0->file;
> + b.src0_reg_type = src0->type;
> +
> + if(src1) {
> + b.src1_reg_type = src1->type;
> + b.src1_reg_file = src1->file;
> + } else {
> + // default to zero
> + b.src1_reg_type = 0;
> + b.src1_reg_file = 0;
> + }
> +
> + compact_table_entry key;
> + key.bit_pattern = b.data;
> +
> + compact_table_entry *r = (compact_table_entry *)bsearch(&key, data_type_table,
> + sizeof(data_type_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> + if (r == NULL)
> + return -1;
> + return r->index;
> + }
> + int compactSubRegBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
> + SubRegBits b;
> + b.data = 0;
> + b.dest_subreg_nr = dst->subnr;
> + b.src0_subreg_nr = src0->subnr;
> + if(src1)
> + b.src1_subreg_nr = src1->subnr;
> + else
> + b.src1_subreg_nr = 0;
> +
> + compact_table_entry key;
> + key.bit_pattern = b.data;
> +
> + compact_table_entry *r = (compact_table_entry *)bsearch(&key, subreg_table,
> + sizeof(subreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> + if (r == NULL)
> + return -1;
> + return r->index;
> + }
> + int compactSrcRegBits(GenEncoder *p, GenRegister *src) {
> + // As we only use GEN_ALIGN_1 and compact only support direct register access,
> + // we only need to verify [hstride, width, vstride]
> + if(src->file == GEN_IMMEDIATE_VALUE)
> + return -1;
> + if(src->address_mode != GEN_ADDRESS_DIRECT)
> + return -1;
> +
> + SrcRegBits b;
> + b.data = 0;
> + b.src_abs = src->absolute;
> + b.src_negate = src->negation;
> + b.src_address_mode = src->address_mode;
> + if(p->curr.execWidth == 1 && src->width == GEN_WIDTH_1) {
> + b.src_width = src->width;
> + b.src_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
> + b.src_vert_stride = GEN_VERTICAL_STRIDE_0;
> + }
> + else {
> + b.src_horiz_stride = src->hstride;
> + b.src_width = src->width;
> + b.src_vert_stride = src->vstride;
> + }
> + compact_table_entry key;
> + key.bit_pattern = b.data;
> +
> + compact_table_entry *r = (compact_table_entry *)bsearch(&key, srcreg_table,
> + sizeof(srcreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> + if (r == NULL)
> + return -1;
> + return r->index;
> + }
> +
> + bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split) {
> + if(split) {
> + // TODO support it
> + return false;
> + } else {
> + int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
> + if(control_index == -1) return false;
> +
> + int data_type_index = compactDataTypeBits(p, &dst, &src, NULL);
> + if(data_type_index == -1) return false;
> +
> + int sub_reg_index = compactSubRegBits(p, &dst, &src, NULL);
> + if(sub_reg_index == -1) return false;
> +
> + int src_reg_index = compactSrcRegBits(p, &src);
> + if(src_reg_index == -1) return false;
> +
> + GenCompactInstruction * insn = p->nextCompact(opcode);
> + insn->bits1.control_index = control_index;
> + insn->bits1.data_type_index = data_type_index;
> + insn->bits1.sub_reg_index = sub_reg_index;
> + insn->bits1.acc_wr_control = p->curr.accWrEnable;
> + insn->bits1.destreg_or_condmod = condition;
> + insn->bits1.cmpt_control = 1;
> + insn->bits1.src0_index_lo = src_reg_index & 3;
> +
> + insn->bits2.src0_index_hi = src_reg_index >> 2;
> + insn->bits2.src1_index = 0;
> + insn->bits2.dest_reg_nr = dst.nr;
> + insn->bits2.src0_reg_nr = src.nr;
> + insn->bits2.src1_reg_nr = 0;
> + return true;
> + }
> + }
> +
> + bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split) {
> + if(split) {
> + // TODO support it
> + return false;
> + } else {
> + if(opcode == GEN_OPCODE_IF || opcode == GEN_OPCODE_ENDIF || opcode == GEN_OPCODE_JMPI) return false;
> +
> + int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
> + if(control_index == -1) return false;
> +
> + int data_type_index = compactDataTypeBits(p, &dst, &src0, &src1);
> + if(data_type_index == -1) return false;
> +
> + int sub_reg_index = compactSubRegBits(p, &dst, &src0, &src1);
> + if(sub_reg_index == -1) return false;
> +
> + int src0_reg_index = compactSrcRegBits(p, &src0);
> + if(src0_reg_index == -1) return false;
> +
> + bool src1_imm = false;
> + int src1_reg_index;
> + if(src1.file == GEN_IMMEDIATE_VALUE) {
> + if(src1.absolute != 0 || src1.negation != 0 || src1.type == GEN_TYPE_F)
> + return false;
> + if(src1.value.d < -4096 || src1.value.d > 4095) // 13bit signed imm
> + return false;
> + src1_imm = true;
> + } else {
> + src1_reg_index = compactSrcRegBits(p, &src1);
> + if(src1_reg_index == -1) return false;
> + }
> + GenCompactInstruction * insn = p->nextCompact(opcode);
> + insn->bits1.control_index = control_index;
> + insn->bits1.data_type_index = data_type_index;
> + insn->bits1.sub_reg_index = sub_reg_index;
> + insn->bits1.acc_wr_control = p->curr.accWrEnable;
> + insn->bits1.destreg_or_condmod = condition;
> + insn->bits1.cmpt_control = 1;
> + insn->bits1.src0_index_lo = src0_reg_index & 3;
> +
> + insn->bits2.src0_index_hi = src0_reg_index >> 2;
> + insn->bits2.src1_index = src1_imm ? (src1.value.ud & 8191)>> 8 : src1_reg_index;
> + insn->bits2.dest_reg_nr = dst.nr;
> + insn->bits2.src0_reg_nr = src0.nr;
> + insn->bits2.src1_reg_nr = src1_imm ? (src1.value.ud & 0xff): src1.nr;
> + return true;
> + }
> + }
> +};
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index d0e3d0b..e04a2c2 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3089,7 +3089,7 @@ namespace gbe
> sel.push();
> sel.curr.noMask = 1;
> sel.curr.predicate = GEN_PREDICATE_NONE;
> - sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1);
> + sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
> sel.pop();
>
> if (sel.block->hasBarrier) {
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index 937f5b2..0794d48 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -524,7 +524,7 @@ namespace gbe
> cmp0->state.subFlag = insn.state.subFlag;
> cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
> cmp0->src(1) = GenRegister::immuw(0);
> - cmp0->dst(0) = GenRegister::null();
> + cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
> cmp0->extra.function = GEN_CONDITIONAL_NEQ;
> insn.prepend(*cmp0);
> validatedFlags.insert(insn.state.flagIndex);
> @@ -545,7 +545,7 @@ namespace gbe
> cmp0->state.subFlag = insn.state.subFlag;
> cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
> cmp0->src(1) = GenRegister::immuw(0);
> - cmp0->dst(0) = GenRegister::null();
> + cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
> cmp0->extra.function = GEN_CONDITIONAL_NEQ;
> insn.prepend(*cmp0);
> }
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 0480dd8..6863aab 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -551,13 +551,13 @@ namespace gbe
>
> static INLINE GenRegister immuw(uint16_t uw) {
> GenRegister immediate = imm(GEN_TYPE_UW);
> - immediate.value.ud = uw | (uw << 16);
> + immediate.value.ud = uw;
> return immediate;
> }
>
> static INLINE GenRegister immw(int16_t w) {
> GenRegister immediate = imm(GEN_TYPE_W);
> - immediate.value.d = w | (w << 16);
> + immediate.value.d = w;
> return immediate;
> }
>
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list