[Beignet] [PATCH] GBE: Implement instruction compact.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Apr 20 18:08:36 PDT 2014


This patch LGTM. Good work, Ruiling.

One minor comment is that I found you put the following statement in the file
you newly created.
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>

I assume it is a copy/paste error, right? I will correct it for you and
push the patch latter. Thanks.

On Tue, Apr 15, 2014 at 04:53:17PM +0800, Ruiling Song wrote:
> A native GEN ASM would takes 2*64bit, but GEN also support compact instruction
> which only takes 64bit. To make code easily understood, GenInstruction now only
> stands for 64bit memory, and use GenNativeInstruction & GenCompactInstruction
> to represent normal(native) and compact instruction.
> 
> After this change, it is not easily to map SelectionInstruction distance to ASM distance.
> As the instructions in the distance maybe compacted. To not introduce too much
> complexity, JMP, IF, ENDIF, NOP will NEVER be compacted.
> 
> Some experiment in luxMark shows it could reduce about 20% instruction memory.
> But it is sad that no performance improvement observed.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/CMakeLists.txt                 |    1 +
>  backend/src/backend/gen/gen_mesa_disasm.c  |   20 +-
>  backend/src/backend/gen_context.cpp        |   41 +-
>  backend/src/backend/gen_defs.hpp           |  953 +++++++++++++++-------------
>  backend/src/backend/gen_encoder.cpp        |  116 ++--
>  backend/src/backend/gen_encoder.hpp        |   11 +-
>  backend/src/backend/gen_insn_compact.cpp   |  521 +++++++++++++++
>  backend/src/backend/gen_insn_selection.cpp |    2 +-
>  backend/src/backend/gen_reg_allocation.cpp |    4 +-
>  backend/src/backend/gen_register.hpp       |    4 +-
>  10 files changed, 1133 insertions(+), 540 deletions(-)
>  create mode 100644 backend/src/backend/gen_insn_compact.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index d6f2d3c..9c96f33 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -162,6 +162,7 @@ else (GBE_USE_BLOB)
>      backend/gen_program.hpp
>      backend/gen_program.h
>      backend/gen_defs.hpp
> +    backend/gen_insn_compact.cpp
>      backend/gen_encoder.hpp
>      backend/gen_encoder.cpp)
>  
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index e58ef31..871277b 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -533,7 +533,7 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
>    return err;
>  }
>  
> -static int dest (FILE *file, const struct GenInstruction *inst)
> +static int dest (FILE *file, const union GenNativeInstruction *inst)
>  {
>    int	err = 0;
>  
> @@ -587,7 +587,7 @@ static int dest (FILE *file, const struct GenInstruction *inst)
>    return 0;
>  }
>  
> -static int dest_3src (FILE *file, const struct GenInstruction *inst)
> +static int dest_3src (FILE *file, const union GenNativeInstruction *inst)
>  {
>    int	err = 0;
>    const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE;
> @@ -720,7 +720,7 @@ static int src_da16 (FILE *file,
>    return err;
>  }
>  
> -static int src0_3src (FILE *file, const struct GenInstruction *inst)
> +static int src0_3src (FILE *file, const union GenNativeInstruction *inst)
>  {
>    int err = 0;
>    uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
> @@ -768,7 +768,7 @@ static int src0_3src (FILE *file, const struct GenInstruction *inst)
>    return err;
>  }
>  
> -static int src1_3src (FILE *file, const struct GenInstruction *inst)
> +static int src1_3src (FILE *file, const union GenNativeInstruction *inst)
>  {
>    int err = 0;
>    uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
> @@ -821,7 +821,7 @@ static int src1_3src (FILE *file, const struct GenInstruction *inst)
>  }
>  
>  
> -static int src2_3src (FILE *file, const struct GenInstruction *inst)
> +static int src2_3src (FILE *file, const union GenNativeInstruction *inst)
>  {
>    int err = 0;
>    uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
> @@ -871,7 +871,7 @@ static int src2_3src (FILE *file, const struct GenInstruction *inst)
>    return err;
>  }
>  
> -static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
> +static int imm (FILE *file, uint32_t type, const union GenNativeInstruction *inst) {
>    switch (type) {
>      case GEN_TYPE_UD:
>        format (file, "0x%xUD", inst->bits3.ud);
> @@ -900,7 +900,7 @@ static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
>    return 0;
>  }
>  
> -static int src0 (FILE *file, const struct GenInstruction *inst)
> +static int src0 (FILE *file, const union GenNativeInstruction *inst)
>  {
>    if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE)
>      return imm (file, inst->bits1.da1.src0_reg_type,
> @@ -960,7 +960,7 @@ static int src0 (FILE *file, const struct GenInstruction *inst)
>    }
>  }
>  
> -static int src1 (FILE *file, const struct GenInstruction *inst)
> +static int src1 (FILE *file, const union GenNativeInstruction *inst)
>  {
>    if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE)
>      return imm (file, inst->bits1.da1.src1_reg_type,
> @@ -1029,7 +1029,7 @@ static const int esize[6] = {
>    [5] = 32,
>  };
>  
> -static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
> +static int qtr_ctrl(FILE *file, const union GenNativeInstruction *inst)
>  {
>    int qtr_ctl = inst->header.quarter_control;
>    int exec_size = esize[inst->header.execution_size];
> @@ -1060,7 +1060,7 @@ static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
>  
>  int gen_disasm (FILE *file, const void *opaque_insn)
>  {
> -  const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn;
> +  const union GenNativeInstruction *inst = (const union GenNativeInstruction *) opaque_insn;
>    int	err = 0;
>    int space = 0;
>    int gen = 7;
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 50f10c5..f8292d6 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -39,6 +39,7 @@
>  
>  namespace gbe
>  {
> +  extern void decompactInstruction(union GenCompactInstruction *p, union GenNativeInstruction *pOut);
>    ///////////////////////////////////////////////////////////////////////////
>    // GenContext implementation
>    ///////////////////////////////////////////////////////////////////////////
> @@ -88,16 +89,18 @@ namespace gbe
>        const LabelIndex label = pair.first;
>        const int32_t insnID = pair.second;
>        const int32_t targetID = labelPos.find(label)->second;
> -      p->patchJMPI(insnID, (targetID - insnID) * 2);
> +      p->patchJMPI(insnID, (targetID - insnID));
>      }
>      for (auto pair : branchPos3) {
>        const LabelPair labelPair = pair.first;
>        const int32_t insnID = pair.second;
> -      const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0;
> -      const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1;
> -      assert((jip - insnID) * 2 < 32767 && (jip - insnID) * 2 > -32768);
> -      assert((uip - insnID) * 2 < 32767 && (uip - insnID) * 2 > -32768);
> -      p->patchJMPI(insnID, (((uip - insnID) * 2) << 16) | ((jip - insnID) * 2));
> +      // FIXME the 'labelPair' implementation must be fixed, as it is hard to
> +      // convert InstructionSelection offset to ASM offset since asm maybe compacted
> +      const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0*2;
> +      const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1*2;
> +      assert((jip - insnID) < 32767 && (jip - insnID) > -32768);
> +      assert((uip - insnID) < 32767 && (uip - insnID) > -32768);
> +      p->patchJMPI(insnID, (((uip - insnID)) << 16) | ((jip - insnID)));
>      }
>    }
>  
> @@ -975,7 +978,7 @@ namespace gbe
>        p->SHL(high, low, tmp);
>        p->MOV(low, GenRegister::immud(0));
>  
> -      p->patchJMPI(jip1, (p->n_instruction() - jip1) * 2);
> +      p->patchJMPI(jip1, (p->n_instruction() - jip1) );
>        p->curr.predicate = GEN_PREDICATE_NONE;
>        p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(31));  //update dst where high != 0
>        p->curr.predicate = GEN_PREDICATE_NORMAL;
> @@ -989,7 +992,7 @@ namespace gbe
>        p->CMP(GEN_CONDITIONAL_EQ, high, GenRegister::immud(0x80000000));
>        p->CMP(GEN_CONDITIONAL_EQ, low, GenRegister::immud(0x0));
>        p->AND(dst_ud, dst_ud, GenRegister::immud(0xfffffffe));
> -      p->patchJMPI(jip0, (p->n_instruction() - jip0) * 2);
> +      p->patchJMPI(jip0, (p->n_instruction() - jip0));
>  
>      p->pop();
>  
> @@ -1426,6 +1429,7 @@ namespace gbe
>      GenRegister zero = GenRegister::immud(0),
>                  one = GenRegister::immud(1),
>                  imm31 = GenRegister::immud(31);
> +    uint32_t jip0;
>      // (a,b) <- x
>      loadTopHalf(a, x);
>      loadBottomHalf(b, x);
> @@ -1516,10 +1520,11 @@ namespace gbe
>          p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
>        else
>          NOT_IMPLEMENTED;
> -      int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
> +      int distance = -(int)(p->n_instruction() - loop_start );
>        p->curr.noMask = 1;
> +      jip0 = p->n_instruction();
>        p->JMPI(zero);
> -      p->patchJMPI(p->n_instruction() - 1, jip + 2);
> +      p->patchJMPI(jip0, distance);
>        p->pop();
>        // end of loop
>      }
> @@ -2001,14 +2006,24 @@ namespace gbe
>      if (OCL_OUTPUT_ASM) {
>        std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
>        ir::LabelIndex curLabel = (ir::LabelIndex)0;
> +      GenCompactInstruction * pCom = NULL;
> +      GenNativeInstruction insn;
>        std::cout << "  L0:" << std::endl;
> -      for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) {
> +      for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
>          if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
>            std::cout << "  L" << curLabel + 1 << ":" << std::endl;
>            curLabel = (ir::LabelIndex)(curLabel + 1);
>          }
> -        std::cout << "    (" << std::setw(8) << insnID * 2 << ")  ";
> -        gen_disasm(stdout, &p->store[insnID]);
> +        std::cout << "    (" << std::setw(8) << insnID << ")  ";
> +        pCom = (GenCompactInstruction*)&p->store[insnID];
> +        if(pCom->bits1.cmpt_control == 1) {
> +          decompactInstruction(pCom, &insn);
> +          gen_disasm(stdout, &insn);
> +          insnID++;
> +        } else {
> +          gen_disasm(stdout, &p->store[insnID]);
> +          insnID = insnID + 2;
> +        }
>        }
>        std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
>      }
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index e731174..4ad1cd1 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -436,475 +436,512 @@ enum GenMessageTarget {
>  #define GEN_MAX_GRF 128
>  
>  /* Instruction format for the execution units */
> -struct GenInstruction
> -{
> -  struct {
> -    uint32_t opcode:7;
> -    uint32_t pad:1;
> -    uint32_t access_mode:1;
> -    uint32_t mask_control:1;
> -    uint32_t dependency_control:2;
> -    uint32_t quarter_control:2;
> -    uint32_t thread_control:2;
> -    uint32_t predicate_control:4;
> -    uint32_t predicate_inverse:1;
> -    uint32_t execution_size:3;
> -    uint32_t destreg_or_condmod:4;
> -    uint32_t acc_wr_control:1;
> -    uint32_t cmpt_control:1;
> -    uint32_t debug_control:1;
> -    uint32_t saturate:1;
> -  } header;
> -
> -  union {
> -    struct {
> -      uint32_t dest_reg_file:2;
> -      uint32_t dest_reg_type:3;
> -      uint32_t src0_reg_file:2;
> -      uint32_t src0_reg_type:3;
> -      uint32_t src1_reg_file:2;
> -      uint32_t src1_reg_type:3;
> -      uint32_t nib_ctrl:1;
> -      uint32_t dest_subreg_nr:5;
> -      uint32_t dest_reg_nr:8;
> -      uint32_t dest_horiz_stride:2;
> -      uint32_t dest_address_mode:1;
> -    } da1;
> -
> -    struct {
> -      uint32_t dest_reg_file:2;
> -      uint32_t dest_reg_type:3;
> -      uint32_t src0_reg_file:2;
> -      uint32_t src0_reg_type:3;
> -      uint32_t src1_reg_file:2;        /* 0x00000c00 */
> -      uint32_t src1_reg_type:3;        /* 0x00007000 */
> -      uint32_t nib_ctrl:1;
> -      int dest_indirect_offset:10;        /* offset against the deref'd address reg */
> -      uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
> -      uint32_t dest_horiz_stride:2;
> -      uint32_t dest_address_mode:1;
> -    } ia1;
> -
> -    struct {
> -      uint32_t dest_reg_file:2;
> -      uint32_t dest_reg_type:3;
> -      uint32_t src0_reg_file:2;
> -      uint32_t src0_reg_type:3;
> -      uint32_t src1_reg_file:2;
> -      uint32_t src1_reg_type:3;
> -      uint32_t nib_ctrl:1;
> -      uint32_t dest_writemask:4;
> -      uint32_t dest_subreg_nr:1;
> -      uint32_t dest_reg_nr:8;
> -      uint32_t dest_horiz_stride:2;
> -      uint32_t dest_address_mode:1;
> -    } da16;
>  
> -    struct {
> -      uint32_t dest_reg_file:2;
> -      uint32_t dest_reg_type:3;
> -      uint32_t src0_reg_file:2;
> -      uint32_t src0_reg_type:3;
> -      uint32_t nib_ctrl:1;
> -      uint32_t dest_writemask:4;
> -      int dest_indirect_offset:6;
> -      uint32_t dest_subreg_nr:3;
> -      uint32_t dest_horiz_stride:2;
> -      uint32_t dest_address_mode:1;
> -    } ia16;
> +struct GenInstruction {
> +  uint32_t low;
> +  uint32_t high;
> +};
>  
> +union GenCompactInstruction {
> +  struct GenInstruction low;
> +  struct {
>      struct {
> -      uint32_t dest_reg_file:2;
> -      uint32_t dest_reg_type:3;
> -      uint32_t src0_reg_file:2;
> -      uint32_t src0_reg_type:3;
> -      uint32_t src1_reg_file:2;
> -      uint32_t src1_reg_type:3;
> +      uint32_t opcode:7;
> +      uint32_t debug_control:1;
> +      uint32_t control_index:5;
> +      uint32_t data_type_index:5;
> +      uint32_t sub_reg_index:5;
> +      uint32_t acc_wr_control:1;
> +      uint32_t destreg_or_condmod:4;
>        uint32_t pad:1;
> -      int jump_count:16;
> -    } branch_gen6;
> -
> +      uint32_t cmpt_control:1;
> +      uint32_t src0_index_lo:2;
> +    } bits1;
>      struct {
> -      uint32_t dest_reg_file:1;
> -      uint32_t flag_subreg_num:1;
> -      uint32_t pad0:2;
> -      uint32_t src0_abs:1;
> -      uint32_t src0_negate:1;
> -      uint32_t src1_abs:1;
> -      uint32_t src1_negate:1;
> -      uint32_t src2_abs:1;
> -      uint32_t src2_negate:1;
> -      uint32_t pad1:7;
> -      uint32_t dest_writemask:4;
> -      uint32_t dest_subreg_nr:3;
> +      uint32_t src0_index_hi:3;
> +      uint32_t src1_index:5;
>        uint32_t dest_reg_nr:8;
> -    } da3src;
> -  } bits1;
> -
> -  union {
> -    struct {
> -      uint32_t src0_subreg_nr:5;
>        uint32_t src0_reg_nr:8;
> -      uint32_t src0_abs:1;
> -      uint32_t src0_negate:1;
> -      uint32_t src0_address_mode:1;
> -      uint32_t src0_horiz_stride:2;
> -      uint32_t src0_width:3;
> -      uint32_t src0_vert_stride:4;
> -      uint32_t flag_sub_reg_nr:1;
> -      uint32_t flag_reg_nr:1;
> -      uint32_t pad:5;
> -    } da1;
> -
> -    struct {
> -      int src0_indirect_offset:10;
> -      uint32_t src0_subreg_nr:3;
> -      uint32_t src0_abs:1;
> -      uint32_t src0_negate:1;
> -      uint32_t src0_address_mode:1;
> -      uint32_t src0_horiz_stride:2;
> -      uint32_t src0_width:3;
> -      uint32_t src0_vert_stride:4;
> -      uint32_t flag_sub_reg_nr:1;
> -      uint32_t flag_reg_nr:1;
> -      uint32_t pad:5;
> -    } ia1;
> -
> -    struct {
> -      uint32_t src0_swz_x:2;
> -      uint32_t src0_swz_y:2;
> -      uint32_t src0_subreg_nr:1;
> -      uint32_t src0_reg_nr:8;
> -      uint32_t src0_abs:1;
> -      uint32_t src0_negate:1;
> -      uint32_t src0_address_mode:1;
> -      uint32_t src0_swz_z:2;
> -      uint32_t src0_swz_w:2;
> -      uint32_t pad0:1;
> -      uint32_t src0_vert_stride:4;
> -      uint32_t flag_sub_reg_nr:1;
> -      uint32_t flag_reg_nr:1;
> -      uint32_t pad:5;
> -    } da16;
> -
> -    struct {
> -      uint32_t src0_swz_x:2;
> -      uint32_t src0_swz_y:2;
> -      int src0_indirect_offset:6;
> -      uint32_t src0_subreg_nr:3;
> -      uint32_t src0_abs:1;
> -      uint32_t src0_negate:1;
> -      uint32_t src0_address_mode:1;
> -      uint32_t src0_swz_z:2;
> -      uint32_t src0_swz_w:2;
> -      uint32_t pad0:1;
> -      uint32_t src0_vert_stride:4;
> -      uint32_t flag_sub_reg_nr:1;
> -      uint32_t flag_reg_nr:1;
> -      uint32_t pad:5;
> -    } ia16;
> -
> -    struct {
> -      uint32_t src0_rep_ctrl:1;
> -      uint32_t src0_swizzle:8;
> -      uint32_t src0_subreg_nr:3;
> -      uint32_t src0_reg_nr:8;
> -      uint32_t pad0:1;
> -      uint32_t src1_rep_ctrl:1;
> -      uint32_t src1_swizzle:8;
> -      uint32_t src1_subreg_nr_low:2;
> -    } da3src;
> -  } bits2;
> -
> -  union {
> -    struct {
> -      uint32_t src1_subreg_nr:5;
>        uint32_t src1_reg_nr:8;
> -      uint32_t src1_abs:1;
> -      uint32_t src1_negate:1;
> -      uint32_t src1_address_mode:1;
> -      uint32_t src1_horiz_stride:2;
> -      uint32_t src1_width:3;
> -      uint32_t src1_vert_stride:4;
> -      uint32_t pad0:7;
> -    } da1;
> -
> -    struct {
> -      uint32_t src1_swz_x:2;
> -      uint32_t src1_swz_y:2;
> -      uint32_t src1_subreg_nr:1;
> -      uint32_t src1_reg_nr:8;
> -      uint32_t src1_abs:1;
> -      uint32_t src1_negate:1;
> -      uint32_t src1_address_mode:1;
> -      uint32_t src1_swz_z:2;
> -      uint32_t src1_swz_w:2;
> -      uint32_t pad1:1;
> -      uint32_t src1_vert_stride:4;
> -      uint32_t pad2:7;
> -    } da16;
> -
> -    struct {
> -      int  src1_indirect_offset:10;
> -      uint32_t src1_subreg_nr:3;
> -      uint32_t src1_abs:1;
> -      uint32_t src1_negate:1;
> -      uint32_t src1_address_mode:1;
> -      uint32_t src1_horiz_stride:2;
> -      uint32_t src1_width:3;
> -      uint32_t src1_vert_stride:4;
> -      uint32_t pad1:7;
> -    } ia1;
> -
> -    struct {
> -      uint32_t src1_swz_x:2;
> -      uint32_t src1_swz_y:2;
> -      int  src1_indirect_offset:6;
> -      uint32_t src1_subreg_nr:3;
> -      uint32_t src1_abs:1;
> -      uint32_t src1_negate:1;
> -      uint32_t pad0:1;
> -      uint32_t src1_swz_z:2;
> -      uint32_t src1_swz_w:2;
> -      uint32_t pad1:1;
> -      uint32_t src1_vert_stride:4;
> -      uint32_t pad2:7;
> -    } ia16;
> -
> -    struct {
> -      uint32_t function_control:19;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad1:2;
> -      uint32_t end_of_thread:1;
> -    } generic_gen5;
> -
> -    struct {
> -      uint32_t sub_function_id:3;
> -      uint32_t pad0:11;
> -      uint32_t ack_req:1;
> -      uint32_t notify:2;
> -      uint32_t pad1:2;
> -      uint32_t header:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } msg_gateway;
> -
> -    struct {
> -      uint32_t opcode:1;
> -      uint32_t request:1;
> -      uint32_t pad0:2;
> -      uint32_t resource:1;
> -      uint32_t pad1:14;
> -      uint32_t header:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } spawner_gen5;
> -
> -    /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
> -    struct {
> -      uint32_t function:4;
> -      uint32_t int_type:1;
> -      uint32_t precision:1;
> -      uint32_t saturate:1;
> -      uint32_t data_type:1;
> -      uint32_t snapshot:1;
> -      uint32_t pad0:10;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad1:2;
> -      uint32_t end_of_thread:1;
> -    } math_gen5;
> +    } bits2;
> +  };
> +};
>  
> +union GenNativeInstruction
> +{
> +  struct {
> +    struct GenInstruction low;
> +    struct GenInstruction high;
> +  };
> +  struct {
>      struct {
> -      uint32_t bti:8;
> -      uint32_t sampler:4;
> -      uint32_t msg_type:5;
> -      uint32_t simd_mode:2;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad1:2;
> -      uint32_t end_of_thread:1;
> -    } sampler_gen7;
> -
> -    /**
> -     * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> -     *
> -     * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
> -     **/
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t msg_control:5;
> -      uint32_t msg_type:3;
> -      uint32_t pad0:3;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad1:2;
> -      uint32_t end_of_thread:1;
> -    } gen6_dp_sampler_const_cache;
> -
> -    /*! Data port untyped read / write messages */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t rgba:4;
> -      uint32_t simd_mode:2;
> -      uint32_t msg_type:4;
> -      uint32_t category:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_untyped_rw;
> -
> -    /*! Data port byte scatter / gather */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t simd_mode:1;
> -      uint32_t ignored0:1;
> -      uint32_t data_size:2;
> -      uint32_t ignored1:2;
> -      uint32_t msg_type:4;
> -      uint32_t category:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_byte_rw;
> -
> -    /*! Data port Scratch Read/ write */
> -    struct {
> -      uint32_t offset:12;
> -      uint32_t block_size:2;
> -      uint32_t ignored0:1;
> -      uint32_t invalidate_after_read:1;
> -      uint32_t channel_mode:1;
> -      uint32_t msg_type:1;
> -      uint32_t category:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_scratch_rw;
> -
> -    /*! Data port OBlock read / write */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t block_size:3;
> -      uint32_t ignored:2;
> -      uint32_t invalidate_after_read:1;
> -      uint32_t msg_type:4;
> -      uint32_t category:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_oblock_rw;
> -
> -    /*! Data port dword scatter / gather */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t block_size:2;
> -      uint32_t ignored0:3;
> -      uint32_t invalidate_after_read:1;
> -      uint32_t msg_type:4;
> -      uint32_t ignored1:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad2:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_dword_rw;
> -
> -    /*! Data port typed read / write messages */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t chan_mask:4;
> +      uint32_t opcode:7;
>        uint32_t pad:1;
> -      uint32_t slot:1;
> -      uint32_t msg_type:4;
> -      uint32_t pad2:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad3:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_typed_rw;
> -
> -    /*! Memory fence */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t pad:5;
> -      uint32_t commit_enable:1;
> -      uint32_t msg_type:4;
> -      uint32_t pad2:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad3:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_memory_fence;
> -
> -    /*! atomic messages */
> -    struct {
> -      uint32_t bti:8;
> -      uint32_t aop_type:4;
> -      uint32_t simd_mode:1;
> -      uint32_t return_data:1;
> -      uint32_t msg_type:4;
> -      uint32_t category:1;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad3:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_atomic_op;
> -
> -    struct {
> -      uint32_t src1_subreg_nr_high:1;
> -      uint32_t src1_reg_nr:8;
> -      uint32_t pad0:1;
> -      uint32_t src2_rep_ctrl:1;
> -      uint32_t src2_swizzle:8;
> -      uint32_t src2_subreg_nr:3;
> -      uint32_t src2_reg_nr:8;
> -      uint32_t pad1:2;
> -    } da3src;
> -
> -    /*! Message gateway */
> -    struct {
> -      uint32_t subfunc:3;
> -      uint32_t pad:11;
> -      uint32_t ackreq:1;
> -      uint32_t notify:2;
> -      uint32_t pad2:2;
> -      uint32_t header_present:1;
> -      uint32_t response_length:5;
> -      uint32_t msg_length:4;
> -      uint32_t pad3:2;
> -      uint32_t end_of_thread:1;
> -    } gen7_msg_gw;
> -
> -    struct {
> -      uint32_t jip:16;
> -      uint32_t uip:16;
> -    } gen7_branch;
> -
> -    int d;
> -    uint32_t ud;
> -    float f;
> -  } bits3;
> +      uint32_t access_mode:1;
> +      uint32_t mask_control:1;
> +      uint32_t dependency_control:2;
> +      uint32_t quarter_control:2;
> +      uint32_t thread_control:2;
> +      uint32_t predicate_control:4;
> +      uint32_t predicate_inverse:1;
> +      uint32_t execution_size:3;
> +      uint32_t destreg_or_condmod:4;
> +      uint32_t acc_wr_control:1;
> +      uint32_t cmpt_control:1;
> +      uint32_t debug_control:1;
> +      uint32_t saturate:1;
> +    } header;
> +
> +    union {
> +      struct {
> +        uint32_t dest_reg_file:2;
> +        uint32_t dest_reg_type:3;
> +        uint32_t src0_reg_file:2;
> +        uint32_t src0_reg_type:3;
> +        uint32_t src1_reg_file:2;
> +        uint32_t src1_reg_type:3;
> +        uint32_t nib_ctrl:1;
> +        uint32_t dest_subreg_nr:5;
> +        uint32_t dest_reg_nr:8;
> +        uint32_t dest_horiz_stride:2;
> +        uint32_t dest_address_mode:1;
> +      } da1;
> +
> +      struct {
> +        uint32_t dest_reg_file:2;
> +        uint32_t dest_reg_type:3;
> +        uint32_t src0_reg_file:2;
> +        uint32_t src0_reg_type:3;
> +        uint32_t src1_reg_file:2;        /* 0x00000c00 */
> +        uint32_t src1_reg_type:3;        /* 0x00007000 */
> +        uint32_t nib_ctrl:1;
> +        int dest_indirect_offset:10;        /* offset against the deref'd address reg */
> +        uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
> +        uint32_t dest_horiz_stride:2;
> +        uint32_t dest_address_mode:1;
> +      } ia1;
> +
> +      struct {
> +        uint32_t dest_reg_file:2;
> +        uint32_t dest_reg_type:3;
> +        uint32_t src0_reg_file:2;
> +        uint32_t src0_reg_type:3;
> +        uint32_t src1_reg_file:2;
> +        uint32_t src1_reg_type:3;
> +        uint32_t nib_ctrl:1;
> +        uint32_t dest_writemask:4;
> +        uint32_t dest_subreg_nr:1;
> +        uint32_t dest_reg_nr:8;
> +        uint32_t dest_horiz_stride:2;
> +        uint32_t dest_address_mode:1;
> +      } da16;
> +
> +      struct {
> +        uint32_t dest_reg_file:2;
> +        uint32_t dest_reg_type:3;
> +        uint32_t src0_reg_file:2;
> +        uint32_t src0_reg_type:3;
> +        uint32_t nib_ctrl:1;
> +        uint32_t dest_writemask:4;
> +        int dest_indirect_offset:6;
> +        uint32_t dest_subreg_nr:3;
> +        uint32_t dest_horiz_stride:2;
> +        uint32_t dest_address_mode:1;
> +      } ia16;
> +
> +      struct {
> +        uint32_t dest_reg_file:2;
> +        uint32_t dest_reg_type:3;
> +        uint32_t src0_reg_file:2;
> +        uint32_t src0_reg_type:3;
> +        uint32_t src1_reg_file:2;
> +        uint32_t src1_reg_type:3;
> +        uint32_t pad:1;
> +        int jump_count:16;
> +      } branch_gen6;
> +
> +      struct {
> +        uint32_t dest_reg_file:1;
> +        uint32_t flag_subreg_num:1;
> +        uint32_t pad0:2;
> +        uint32_t src0_abs:1;
> +        uint32_t src0_negate:1;
> +        uint32_t src1_abs:1;
> +        uint32_t src1_negate:1;
> +        uint32_t src2_abs:1;
> +        uint32_t src2_negate:1;
> +        uint32_t pad1:7;
> +        uint32_t dest_writemask:4;
> +        uint32_t dest_subreg_nr:3;
> +        uint32_t dest_reg_nr:8;
> +      } da3src;
> +    } bits1;
> +
> +    union {
> +      struct {
> +        uint32_t src0_subreg_nr:5;
> +        uint32_t src0_reg_nr:8;
> +        uint32_t src0_abs:1;
> +        uint32_t src0_negate:1;
> +        uint32_t src0_address_mode:1;
> +        uint32_t src0_horiz_stride:2;
> +        uint32_t src0_width:3;
> +        uint32_t src0_vert_stride:4;
> +        uint32_t flag_sub_reg_nr:1;
> +        uint32_t flag_reg_nr:1;
> +        uint32_t pad:5;
> +      } da1;
> +
> +      struct {
> +        int src0_indirect_offset:10;
> +        uint32_t src0_subreg_nr:3;
> +        uint32_t src0_abs:1;
> +        uint32_t src0_negate:1;
> +        uint32_t src0_address_mode:1;
> +        uint32_t src0_horiz_stride:2;
> +        uint32_t src0_width:3;
> +        uint32_t src0_vert_stride:4;
> +        uint32_t flag_sub_reg_nr:1;
> +        uint32_t flag_reg_nr:1;
> +        uint32_t pad:5;
> +      } ia1;
> +
> +      struct {
> +        uint32_t src0_swz_x:2;
> +        uint32_t src0_swz_y:2;
> +        uint32_t src0_subreg_nr:1;
> +        uint32_t src0_reg_nr:8;
> +        uint32_t src0_abs:1;
> +        uint32_t src0_negate:1;
> +        uint32_t src0_address_mode:1;
> +        uint32_t src0_swz_z:2;
> +        uint32_t src0_swz_w:2;
> +        uint32_t pad0:1;
> +        uint32_t src0_vert_stride:4;
> +        uint32_t flag_sub_reg_nr:1;
> +        uint32_t flag_reg_nr:1;
> +        uint32_t pad:5;
> +      } da16;
> +
> +      struct {
> +        uint32_t src0_swz_x:2;
> +        uint32_t src0_swz_y:2;
> +        int src0_indirect_offset:6;
> +        uint32_t src0_subreg_nr:3;
> +        uint32_t src0_abs:1;
> +        uint32_t src0_negate:1;
> +        uint32_t src0_address_mode:1;
> +        uint32_t src0_swz_z:2;
> +        uint32_t src0_swz_w:2;
> +        uint32_t pad0:1;
> +        uint32_t src0_vert_stride:4;
> +        uint32_t flag_sub_reg_nr:1;
> +        uint32_t flag_reg_nr:1;
> +        uint32_t pad:5;
> +      } ia16;
> +
> +      struct {
> +        uint32_t src0_rep_ctrl:1;
> +        uint32_t src0_swizzle:8;
> +        uint32_t src0_subreg_nr:3;
> +        uint32_t src0_reg_nr:8;
> +        uint32_t pad0:1;
> +        uint32_t src1_rep_ctrl:1;
> +        uint32_t src1_swizzle:8;
> +        uint32_t src1_subreg_nr_low:2;
> +      } da3src;
> +    } bits2;
> +
> +    union {
> +      struct {
> +        uint32_t src1_subreg_nr:5;
> +        uint32_t src1_reg_nr:8;
> +        uint32_t src1_abs:1;
> +        uint32_t src1_negate:1;
> +        uint32_t src1_address_mode:1;
> +        uint32_t src1_horiz_stride:2;
> +        uint32_t src1_width:3;
> +        uint32_t src1_vert_stride:4;
> +        uint32_t pad0:7;
> +      } da1;
> +
> +      struct {
> +        uint32_t src1_swz_x:2;
> +        uint32_t src1_swz_y:2;
> +        uint32_t src1_subreg_nr:1;
> +        uint32_t src1_reg_nr:8;
> +        uint32_t src1_abs:1;
> +        uint32_t src1_negate:1;
> +        uint32_t src1_address_mode:1;
> +        uint32_t src1_swz_z:2;
> +        uint32_t src1_swz_w:2;
> +        uint32_t pad1:1;
> +        uint32_t src1_vert_stride:4;
> +        uint32_t pad2:7;
> +      } da16;
> +
> +      struct {
> +        int  src1_indirect_offset:10;
> +        uint32_t src1_subreg_nr:3;
> +        uint32_t src1_abs:1;
> +        uint32_t src1_negate:1;
> +        uint32_t src1_address_mode:1;
> +        uint32_t src1_horiz_stride:2;
> +        uint32_t src1_width:3;
> +        uint32_t src1_vert_stride:4;
> +        uint32_t pad1:7;
> +      } ia1;
> +
> +      struct {
> +        uint32_t src1_swz_x:2;
> +        uint32_t src1_swz_y:2;
> +        int  src1_indirect_offset:6;
> +        uint32_t src1_subreg_nr:3;
> +        uint32_t src1_abs:1;
> +        uint32_t src1_negate:1;
> +        uint32_t pad0:1;
> +        uint32_t src1_swz_z:2;
> +        uint32_t src1_swz_w:2;
> +        uint32_t pad1:1;
> +        uint32_t src1_vert_stride:4;
> +        uint32_t pad2:7;
> +      } ia16;
> +
> +      struct {
> +        uint32_t function_control:19;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } generic_gen5;
> +
> +      struct {
> +        uint32_t sub_function_id:3;
> +        uint32_t pad0:11;
> +        uint32_t ack_req:1;
> +        uint32_t notify:2;
> +        uint32_t pad1:2;
> +        uint32_t header:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } msg_gateway;
> +
> +      struct {
> +        uint32_t opcode:1;
> +        uint32_t request:1;
> +        uint32_t pad0:2;
> +        uint32_t resource:1;
> +        uint32_t pad1:14;
> +        uint32_t header:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } spawner_gen5;
> +
> +      /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
> +      struct {
> +        uint32_t function:4;
> +        uint32_t int_type:1;
> +        uint32_t precision:1;
> +        uint32_t saturate:1;
> +        uint32_t data_type:1;
> +        uint32_t snapshot:1;
> +        uint32_t pad0:10;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } math_gen5;
> +
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t sampler:4;
> +        uint32_t msg_type:5;
> +        uint32_t simd_mode:2;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } sampler_gen7;
> +
> +      /**
> +       * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
> +       *
> +       * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
> +       **/
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t msg_control:5;
> +        uint32_t msg_type:3;
> +        uint32_t pad0:3;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad1:2;
> +        uint32_t end_of_thread:1;
> +      } gen6_dp_sampler_const_cache;
> +
> +      /*! Data port untyped read / write messages */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t rgba:4;
> +        uint32_t simd_mode:2;
> +        uint32_t msg_type:4;
> +        uint32_t category:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_untyped_rw;
> +
> +      /*! Data port byte scatter / gather */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t simd_mode:1;
> +        uint32_t ignored0:1;
> +        uint32_t data_size:2;
> +        uint32_t ignored1:2;
> +        uint32_t msg_type:4;
> +        uint32_t category:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_byte_rw;
> +
> +      /*! Data port Scratch Read/ write */
> +      struct {
> +        uint32_t offset:12;
> +        uint32_t block_size:2;
> +        uint32_t ignored0:1;
> +        uint32_t invalidate_after_read:1;
> +        uint32_t channel_mode:1;
> +        uint32_t msg_type:1;
> +        uint32_t category:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_scratch_rw;
> +
> +      /*! Data port OBlock read / write */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t block_size:3;
> +        uint32_t ignored:2;
> +        uint32_t invalidate_after_read:1;
> +        uint32_t msg_type:4;
> +        uint32_t category:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_oblock_rw;
> +
> +      /*! Data port dword scatter / gather */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t block_size:2;
> +        uint32_t ignored0:3;
> +        uint32_t invalidate_after_read:1;
> +        uint32_t msg_type:4;
> +        uint32_t ignored1:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad2:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_dword_rw;
> +
> +      /*! Data port typed read / write messages */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t chan_mask:4;
> +        uint32_t pad:1;
> +        uint32_t slot:1;
> +        uint32_t msg_type:4;
> +        uint32_t pad2:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad3:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_typed_rw;
> +
> +      /*! Memory fence */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t pad:5;
> +        uint32_t commit_enable:1;
> +        uint32_t msg_type:4;
> +        uint32_t pad2:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad3:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_memory_fence;
> +
> +      /*! atomic messages */
> +      struct {
> +        uint32_t bti:8;
> +        uint32_t aop_type:4;
> +        uint32_t simd_mode:1;
> +        uint32_t return_data:1;
> +        uint32_t msg_type:4;
> +        uint32_t category:1;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad3:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_atomic_op;
> +
> +      struct {
> +        uint32_t src1_subreg_nr_high:1;
> +        uint32_t src1_reg_nr:8;
> +        uint32_t pad0:1;
> +        uint32_t src2_rep_ctrl:1;
> +        uint32_t src2_swizzle:8;
> +        uint32_t src2_subreg_nr:3;
> +        uint32_t src2_reg_nr:8;
> +        uint32_t pad1:2;
> +      } da3src;
> +
> +      /*! Message gateway */
> +      struct {
> +        uint32_t subfunc:3;
> +        uint32_t pad:11;
> +        uint32_t ackreq:1;
> +        uint32_t notify:2;
> +        uint32_t pad2:2;
> +        uint32_t header_present:1;
> +        uint32_t response_length:5;
> +        uint32_t msg_length:4;
> +        uint32_t pad3:2;
> +        uint32_t end_of_thread:1;
> +      } gen7_msg_gw;
> +
> +      struct {
> +        uint32_t jip:16;
> +        uint32_t uip:16;
> +      } gen7_branch;
> +
> +      int d;
> +      uint32_t ud;
> +      float f;
> +    } bits3;
> +  };
>  };
>  
>  #endif /* __GEN_DEFS_HPP__ */
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index 9df031e..8b5057e 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -51,8 +51,11 @@
>  #include "backend/gen_encoder.hpp"
>  #include <cstring>
>  
> +
>  namespace gbe
>  {
> +  extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
> +  extern bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split);
>    //////////////////////////////////////////////////////////////////////////
>    // Some helper functions to encode
>    //////////////////////////////////////////////////////////////////////////
> @@ -91,7 +94,7 @@ namespace gbe
>    }
>  
>    static void setMessageDescriptor(GenEncoder *p,
> -                                   GenInstruction *inst,
> +                                   GenNativeInstruction *inst,
>                                     enum GenMessageTarget sfid,
>                                     unsigned msg_length,
>                                     unsigned response_length,
> @@ -107,7 +110,7 @@ namespace gbe
>    }
>  
>    static void setDPUntypedRW(GenEncoder *p,
> -                             GenInstruction *insn,
> +                             GenNativeInstruction *insn,
>                               uint32_t bti,
>                               uint32_t rgba,
>                               uint32_t msg_type,
> @@ -128,7 +131,7 @@ namespace gbe
>    }
>  
>    static void setDPByteScatterGather(GenEncoder *p,
> -                                     GenInstruction *insn,
> +                                     GenNativeInstruction *insn,
>                                       uint32_t bti,
>                                       uint32_t elem_size,
>                                       uint32_t msg_type,
> @@ -149,7 +152,7 @@ namespace gbe
>    }
>  #if 0
>    static void setOBlockRW(GenEncoder *p,
> -                          GenInstruction *insn,
> +                          GenNativeInstruction *insn,
>                            uint32_t bti,
>                            uint32_t size,
>                            uint32_t msg_type,
> @@ -167,7 +170,7 @@ namespace gbe
>  #endif
>  
>    static void setSamplerMessage(GenEncoder *p,
> -                                GenInstruction *insn,
> +                                GenNativeInstruction *insn,
>                                  unsigned char bti,
>                                  unsigned char sampler,
>                                  uint32_t msg_type,
> @@ -187,7 +190,7 @@ namespace gbe
>  
>  
>    static void setTypedWriteMessage(GenEncoder *p,
> -                                   GenInstruction *insn,
> +                                   GenNativeInstruction *insn,
>                                     unsigned char bti,
>                                     unsigned char msg_type,
>                                     uint32_t msg_length,
> @@ -199,7 +202,7 @@ namespace gbe
>       insn->bits3.gen7_typed_rw.msg_type = msg_type;
>    }
>    static void setDWordScatterMessgae(GenEncoder *p,
> -                                     GenInstruction *insn,
> +                                     GenNativeInstruction *insn,
>                                       uint32_t bti,
>                                       uint32_t block_size,
>                                       uint32_t msg_type,
> @@ -238,7 +241,7 @@ namespace gbe
>      curr = stack[--stateNum];
>    }
>  
> -  void GenEncoder::setHeader(GenInstruction *insn) {
> +  void GenEncoder::setHeader(GenNativeInstruction *insn) {
>      if (this->curr.execWidth == 8)
>        insn->header.execution_size = GEN_WIDTH_8;
>      else if (this->curr.execWidth == 16)
> @@ -260,7 +263,7 @@ namespace gbe
>      insn->header.saturate = this->curr.saturate;
>    }
>  
> -  void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) {
> +  void GenEncoder::setDst(GenNativeInstruction *insn, GenRegister dest) {
>       if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
>          assert(dest.nr < 128);
>  
> @@ -274,7 +277,7 @@ namespace gbe
>       insn->bits1.da1.dest_horiz_stride = dest.hstride;
>    }
>  
> -  void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) {
> +  void GenEncoder::setSrc0(GenNativeInstruction *insn, GenRegister reg) {
>       if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
>          assert(reg.nr < 128);
>  
> @@ -327,7 +330,7 @@ namespace gbe
>      }
>    }
>  
> -  void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) {
> +  void GenEncoder::setSrc1(GenNativeInstruction *insn, GenRegister reg) {
>       assert(reg.nr < 128);
>       assert(reg.file != GEN_ARCHITECTURE_REGISTER_FILE || reg.nr == 0);
>  
> @@ -442,7 +445,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      assert(elemNum >= 1 || elemNum <= 4);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
> @@ -469,7 +472,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      assert(elemNum >= 1 || elemNum <= 4);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
> @@ -495,7 +498,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
>      if (this->curr.execWidth == 8) {
> @@ -521,7 +524,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
>      this->setHeader(insn);
> @@ -545,7 +548,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
>      uint32_t block_size = 0;
> @@ -575,7 +578,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      uint32_t msg_length = 0;
>      uint32_t response_length = 0;
>  
> @@ -608,13 +611,21 @@ namespace gbe
>        NOT_SUPPORTED;
>  
>    }
> +  GenCompactInstruction *GenEncoder::nextCompact(uint32_t opcode) {
> +    GenCompactInstruction insn;
> +    std::memset(&insn, 0, sizeof(GenCompactInstruction));
> +    insn.bits1.opcode = opcode;
> +    this->store.push_back(insn.low);
> +    return (GenCompactInstruction *)&this->store.back();
> +  }
>  
> -  GenInstruction *GenEncoder::next(uint32_t opcode) {
> -     GenInstruction insn;
> -     std::memset(&insn, 0, sizeof(GenInstruction));
> +  GenNativeInstruction *GenEncoder::next(uint32_t opcode) {
> +     GenNativeInstruction insn;
> +     std::memset(&insn, 0, sizeof(GenNativeInstruction));
>       insn.header.opcode = opcode;
> -     this->store.push_back(insn);
> -     return &this->store.back();
> +     this->store.push_back(insn.low);
> +     this->store.push_back(insn.high);
> +     return (GenNativeInstruction *)(&this->store.back()-1);
>    }
>  
>    INLINE void _handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
> @@ -622,7 +633,7 @@ namespace gbe
>         int w = p->curr.execWidth;
>         p->push();
>         p->curr.nibControl = 0;
> -       GenInstruction *insn = p->next(opcode);
> +       GenNativeInstruction *insn = p->next(opcode);
>         p->setHeader(insn);
>         p->setDst(insn, dst);
>         p->setSrc0(insn, src0);
> @@ -678,7 +689,9 @@ namespace gbe
>         }
>         p->pop();
>       } else if (needToSplitAlu1(p, dst, src) == false) {
> -       GenInstruction *insn = p->next(opcode);
> +      if(compactAlu1(p, opcode, dst, src, condition, false))
> +        return;
> +       GenNativeInstruction *insn = p->next(opcode);
>         if (condition != 0) {
>           GBE_ASSERT(opcode == GEN_OPCODE_MOV ||
>                      opcode == GEN_OPCODE_NOT);
> @@ -688,7 +701,7 @@ namespace gbe
>         p->setDst(insn, dst);
>         p->setSrc0(insn, src);
>       } else {
> -       GenInstruction *insnQ1, *insnQ2;
> +       GenNativeInstruction *insnQ1, *insnQ2;
>  
>         // Instruction for the first quarter
>         insnQ1 = p->next(opcode);
> @@ -718,7 +731,9 @@ namespace gbe
>      if (dst.isdf() && src0.isdf() && src1.isdf()) {
>         handleDouble(p, opcode, dst, src0, src1);
>      } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
> -       GenInstruction *insn = p->next(opcode);
> +       if(compactAlu2(p, opcode, dst, src0, src1, condition, false))
> +         return;
> +       GenNativeInstruction *insn = p->next(opcode);
>         if (condition != 0) {
>           GBE_ASSERT(opcode == GEN_OPCODE_OR ||
>                      opcode == GEN_OPCODE_XOR ||
> @@ -730,7 +745,7 @@ namespace gbe
>         p->setSrc0(insn, src0);
>         p->setSrc1(insn, src1);
>      } else {
> -       GenInstruction *insnQ1, *insnQ2;
> +       GenNativeInstruction *insnQ1, *insnQ2;
>  
>         // Instruction for the first quarter
>         insnQ1 = p->next(opcode);
> @@ -754,14 +769,14 @@ namespace gbe
>  
>  #define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6))
>  
> -  static GenInstruction *alu3(GenEncoder *p,
> +  static GenNativeInstruction *alu3(GenEncoder *p,
>                                uint32_t opcode,
>                                GenRegister dest,
>                                GenRegister src0,
>                                GenRegister src1,
>                                GenRegister src2)
>    {
> -     GenInstruction *insn = p->next(opcode);
> +     GenNativeInstruction *insn = p->next(opcode);
>  
>       assert(dest.file == GEN_GENERAL_REGISTER_FILE);
>       assert(dest.nr < 128);
> @@ -811,7 +826,7 @@ namespace gbe
>  
>       // Emit second half of the instruction
>       if (p->curr.execWidth == 16) {
> -      GenInstruction q1Insn = *insn;
> +      GenNativeInstruction q1Insn = *insn;
>        insn = p->next(opcode);
>        *insn = q1Insn;
>        insn->header.quarter_control = GEN_COMPRESSION_Q2;
> @@ -1048,14 +1063,14 @@ namespace gbe
>  
>  
>    void GenEncoder::NOP(void) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_NOP);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_NOP);
>      this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
>      this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
>      this->setSrc1(insn, GenRegister::immud(0x0));
>    }
>  
>    void GenEncoder::BARRIER(GenRegister src) {
> -     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>       this->setHeader(insn);
>       this->setDst(insn, GenRegister::null());
>       this->setSrc0(insn, src);
> @@ -1064,7 +1079,7 @@ namespace gbe
>       insn->bits3.msg_gateway.notify = 0x1;
>    }
>    void GenEncoder::FENCE(GenRegister dst) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      this->setHeader(insn);
>      this->setDst(insn, dst);
>      this->setSrc0(insn, dst);
> @@ -1090,7 +1105,7 @@ namespace gbe
>    ALU2_BRA(BRC)
>  
>    void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
> -    GenInstruction &insn = this->store[insnID];
> +    GenNativeInstruction &insn = *(GenNativeInstruction *)&this->store[insnID];
>      GBE_ASSERT(insnID < this->store.size());
>      GBE_ASSERT(insn.header.opcode == GEN_OPCODE_JMPI ||
>                 insn.header.opcode == GEN_OPCODE_BRD  ||
> @@ -1118,7 +1133,7 @@ namespace gbe
>        // for all the branching instruction. And need to adjust the distance
>        // for those branch instruction's start point and end point contains
>        // this instruction.
> -      GenInstruction &insn2 = this->store[insnID+1];
> +      GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
>        GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
>        insn.header.opcode = GEN_OPCODE_ADD;
>        this->setDst(&insn, GenRegister::ip());
> @@ -1127,7 +1142,7 @@ namespace gbe
>      } else {
>        insn.header.predicate_inverse ^= 1;
>        this->setSrc1(&insn, GenRegister::immd(2));
> -      GenInstruction &insn2 = this->store[insnID+1];
> +      GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
>        GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
>        GBE_ASSERT(insnID < this->store.size());
>        insn2.header.predicate_control = GEN_PREDICATE_NONE;
> @@ -1140,7 +1155,10 @@ namespace gbe
>  
>    void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
>      if (needToSplitCmp(this, src0, src1) == false) {
> -      GenInstruction *insn = this->next(GEN_OPCODE_CMP);
> +      if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
> +        return;
> +      }
> +      GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
>        this->setHeader(insn);
>        insn->header.destreg_or_condmod = conditional;
>        insn->header.thread_control = GEN_THREAD_SWITCH;
> @@ -1148,7 +1166,7 @@ namespace gbe
>        this->setSrc0(insn, src0);
>        this->setSrc1(insn, src1);
>      } else {
> -      GenInstruction *insnQ1, *insnQ2;
> +      GenNativeInstruction *insnQ1, *insnQ2;
>  
>        // Instruction for the first quarter
>        insnQ1 = this->next(GEN_OPCODE_CMP);
> @@ -1177,7 +1195,7 @@ namespace gbe
>                             GenRegister src0,
>                             GenRegister src1)
>    {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEL);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEL);
>      GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
>      this->setHeader(insn);
>      insn->header.destreg_or_condmod = conditional;
> @@ -1187,7 +1205,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::WAIT(void) {
> -     GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
>       GenRegister src = GenRegister::notification1();
>       this->setDst(insn, GenRegister::null());
>       this->setSrc0(insn, src);
> @@ -1198,7 +1216,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) {
> -     GenInstruction *insn = this->next(GEN_OPCODE_MATH);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
>       assert(dst.file == GEN_GENERAL_REGISTER_FILE);
>       assert(src0.file == GEN_GENERAL_REGISTER_FILE);
>       assert(src1.file == GEN_GENERAL_REGISTER_FILE);
> @@ -1226,7 +1244,7 @@ namespace gbe
>          insn->header.quarter_control = GEN_COMPRESSION_Q1;
>  
>          if(this->curr.execWidth == 16) {
> -          GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
> +          GenNativeInstruction *insn2 = this->next(GEN_OPCODE_MATH);
>            GenRegister new_dest, new_src0, new_src1;
>            new_dest = GenRegister::QnPhysical(dst, 1);
>            new_src0 = GenRegister::QnPhysical(src0, 1);
> @@ -1244,7 +1262,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
> -     GenInstruction *insn = this->next(GEN_OPCODE_MATH);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
>       assert(dst.file == GEN_GENERAL_REGISTER_FILE);
>       assert(src.file == GEN_GENERAL_REGISTER_FILE);
>       assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
> @@ -1275,7 +1293,7 @@ namespace gbe
>         msg_length++;
>       uint32_t simd_mode = (simdWidth == 16) ?
>                              GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
> -     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>       this->setHeader(insn);
>       this->setDst(insn, dest);
>       this->setSrc0(insn, msg);
> @@ -1287,7 +1305,7 @@ namespace gbe
>  
>    void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
>    {
> -     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>       uint32_t msg_type = GEN_TYPED_WRITE;
>       uint32_t msg_length = header_present ? 9 : 8;
>       this->setHeader(insn);
> @@ -1296,7 +1314,7 @@ namespace gbe
>       setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
>    }
>    static void setScratchMessage(GenEncoder *p,
> -                                   GenInstruction *insn,
> +                                   GenNativeInstruction *insn,
>                                     uint32_t offset,
>                                     uint32_t block_size,
>                                     uint32_t channel_mode,
> @@ -1317,7 +1335,7 @@ namespace gbe
>    {
>       assert(src_num == 1 || src_num ==2);
>       uint32_t block_size = src_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
> -     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>       this->setHeader(insn);
>       this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
>       this->setSrc0(insn, msg);
> @@ -1330,7 +1348,7 @@ namespace gbe
>    {
>       assert(dst_num == 1 || dst_num ==2);
>       uint32_t block_size = dst_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
> -     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>       this->setHeader(insn);
>       this->setDst(insn, dst);
>       this->setSrc0(insn, src);
> @@ -1340,7 +1358,7 @@ namespace gbe
>    }
>  
>    void GenEncoder::EOT(uint32_t msg) {
> -    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
> +    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
>      this->setSrc0(insn, GenRegister::ud8grf(msg,0));
>      this->setSrc1(insn, GenRegister::immud(0));
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 50662fb..4c65a9c 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -202,11 +202,12 @@ namespace gbe
>      ////////////////////////////////////////////////////////////////////////
>      // Helper functions to encode
>      ////////////////////////////////////////////////////////////////////////
> -    void setHeader(GenInstruction *insn);
> -    void setDst(GenInstruction *insn, GenRegister dest);
> -    void setSrc0(GenInstruction *insn, GenRegister reg);
> -    void setSrc1(GenInstruction *insn, GenRegister reg);
> -    GenInstruction *next(uint32_t opcode);
> +    void setHeader(GenNativeInstruction *insn);
> +    void setDst(GenNativeInstruction *insn, GenRegister dest);
> +    void setSrc0(GenNativeInstruction *insn, GenRegister reg);
> +    void setSrc1(GenNativeInstruction *insn, GenRegister reg);
> +    GenCompactInstruction *nextCompact(uint32_t opcode);
> +    GenNativeInstruction *next(uint32_t opcode);
>      uint32_t n_instruction(void) const { return store.size(); }
>      GBE_CLASS(GenEncoder); //!< Use custom allocators
>    };
> diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
> new file mode 100644
> index 0000000..5b0a897
> --- /dev/null
> +++ b/backend/src/backend/gen_insn_compact.cpp
> @@ -0,0 +1,521 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Benjamin Segovia <benjamin.segovia at intel.com>
> + */
> +#include "backend/gen_defs.hpp"
> +#include "backend/gen_encoder.hpp"
> +#include <cstring>
> +
> +namespace gbe {
> +
> +  struct compact_table_entry {
> +    uint32_t bit_pattern;
> +    uint32_t index;
> +  };
> +
> +  static compact_table_entry control_table[] = {
> +    {0b0000000000000000010, 0},
> +    {0b0000100000000000000, 1},
> +    {0b0000100000000000001, 2},
> +    {0b0000100000000000010, 3},
> +    {0b0000100000000000011, 4},
> +    {0b0000100000000000100, 5},
> +    {0b0000100000000000101, 6},
> +    {0b0000100000000000111, 7},
> +    {0b0000100000000001000, 8},
> +    {0b0000100000000001001, 9},
> +    {0b0000100000000001101, 10},
> +    {0b0000110000000000000, 11},
> +    {0b0000110000000000001, 12},
> +    {0b0000110000000000010, 13},
> +    {0b0000110000000000011, 14},
> +    {0b0000110000000000100, 15},
> +    {0b0000110000000000101, 16},
> +    {0b0000110000000000111, 17},
> +    {0b0000110000000001001, 18},
> +    {0b0000110000000001101, 19},
> +    {0b0000110000000010000, 20},
> +    {0b0000110000100000000, 21},
> +    {0b0001000000000000000, 22},
> +    {0b0001000000000000010, 23},
> +    {0b0001000000000000100, 24},
> +    {0b0001000000100000000, 25},
> +    {0b0010110000000000000, 26},
> +    {0b0010110000000010000, 27},
> +    {0b0011000000000000000, 28},
> +    {0b0011000000100000000, 29},
> +    {0b0101000000000000000, 30},
> +    {0b0101000000100000000, 31},
> +  };
> +
> +  static compact_table_entry data_type_table[] = {
> +    {0b000000001000001100, 20},
> +    {0b001000000000000001, 0},
> +    {0b001000000000100000, 1},
> +    {0b001000000000100001, 2},
> +    {0b001000000000111101, 21},
> +    {0b001000000001100001, 3},
> +    {0b001000000010100101, 22},
> +    {0b001000000010111101, 4},
> +    {0b001000001011111101, 5},
> +    {0b001000001110100001, 6},
> +    {0b001000001110100101, 7},
> +    {0b001000001110111101, 8},
> +    {0b001000010000100000, 23},
> +    {0b001000010000100001, 9},
> +    {0b001000110000100000, 10},
> +    {0b001000110000100001, 11},
> +    {0b001001010010100100, 24},
> +    {0b001001010010100101, 12},
> +    {0b001001110010000100, 25},
> +    {0b001001110010100100, 13},
> +    {0b001001110010100101, 14},
> +    {0b001010010100001001, 26},
> +    {0b001010010100101000, 30},
> +    {0b001010110100101000, 31},
> +    {0b001011110110101100, 29},
> +    {0b001101111110111101, 27},
> +    {0b001111001110111101, 15},
> +    {0b001111011110011101, 16},
> +    {0b001111011110111100, 17},
> +    {0b001111011110111101, 18},
> +    {0b001111111110111100, 19},
> +    {0b001111111110111101, 28},
> +  };
> +
> +  static compact_table_entry data_type_decompact[] = {
> +    {0b001000000000000001, 0},
> +    {0b001000000000100000, 1},
> +    {0b001000000000100001, 2},
> +    {0b001000000001100001, 3},
> +    {0b001000000010111101, 4},
> +    {0b001000001011111101, 5},
> +    {0b001000001110100001, 6},
> +    {0b001000001110100101, 7},
> +    {0b001000001110111101, 8},
> +    {0b001000010000100001, 9},
> +    {0b001000110000100000, 10},
> +    {0b001000110000100001, 11},
> +    {0b001001010010100101, 12},
> +    {0b001001110010100100, 13},
> +    {0b001001110010100101, 14},
> +    {0b001111001110111101, 15},
> +    {0b001111011110011101, 16},
> +    {0b001111011110111100, 17},
> +    {0b001111011110111101, 18},
> +    {0b001111111110111100, 19},
> +    {0b000000001000001100, 20},
> +    {0b001000000000111101, 21},
> +    {0b001000000010100101, 22},
> +    {0b001000010000100000, 23},
> +    {0b001001010010100100, 24},
> +    {0b001001110010000100, 25},
> +    {0b001010010100001001, 26},
> +    {0b001101111110111101, 27},
> +    {0b001111111110111101, 28},
> +    {0b001011110110101100, 29},
> +    {0b001010010100101000, 30},
> +    {0b001010110100101000, 31},
> +  };
> +
> +  static compact_table_entry subreg_table[] = {
> +    {0b000000000000000, 0},
> +    {0b000000000000001, 1},
> +    {0b000000000001000, 2},
> +    {0b000000000001111, 3},
> +    {0b000000000010000, 4},
> +    {0b000000010000000, 5},
> +    {0b000000100000000, 6},
> +    {0b000000110000000, 7},
> +    {0b000001000000000, 8},
> +    {0b000001000010000, 9},
> +    {0b000001010000000, 10},
> +    {0b001000000000000, 11},
> +    {0b001000000000001, 12},
> +    {0b001000010000001, 13},
> +    {0b001000010000010, 14},
> +    {0b001000010000011, 15},
> +    {0b001000010000100, 16},
> +    {0b001000010000111, 17},
> +    {0b001000010001000, 18},
> +    {0b001000010001110, 19},
> +    {0b001000010001111, 20},
> +    {0b001000110000000, 21},
> +    {0b001000111101000, 22},
> +    {0b010000000000000, 23},
> +    {0b010000110000000, 24},
> +    {0b011000000000000, 25},
> +    {0b011110010000111, 26},
> +    {0b100000000000000, 27},
> +    {0b101000000000000, 28},
> +    {0b110000000000000, 29},
> +    {0b111000000000000, 30},
> +    {0b111000000011100, 31},
> +  };
> +
> +  static compact_table_entry srcreg_table[] = {
> +    {0b000000000000, 0},
> +    {0b000000000010, 1},
> +    {0b000000010000, 2},
> +    {0b000000010010, 3},
> +    {0b000000011000, 4},
> +    {0b000000100000, 5},
> +    {0b000000101000, 6},
> +    {0b000001001000, 7},
> +    {0b000001010000, 8},
> +    {0b000001110000, 9},
> +    {0b000001111000, 10},
> +    {0b001100000000, 11},
> +    {0b001100000010, 12},
> +    {0b001100001000, 13},
> +    {0b001100010000, 14},
> +    {0b001100010010, 15},
> +    {0b001100100000, 16},
> +    {0b001100101000, 17},
> +    {0b001100111000, 18},
> +    {0b001101000000, 19},
> +    {0b001101000010, 20},
> +    {0b001101001000, 21},
> +    {0b001101010000, 22},
> +    {0b001101100000, 23},
> +    {0b001101101000, 24},
> +    {0b001101110000, 25},
> +    {0b001101110001, 26},
> +    {0b001101111000, 27},
> +    {0b010001101000, 28},
> +    {0b010001101001, 29},
> +    {0b010001101010, 30},
> +    {0b010110001000, 31},
> +  };
> +
> +  static int cmp_key(const void *p1, const void*p2) {
> +    const compact_table_entry * px = (compact_table_entry *)p1;
> +    const compact_table_entry * py = (compact_table_entry *)p2;
> +    return (px->bit_pattern) - py->bit_pattern;
> +  }
> +  union ControlBits{
> +    struct {
> +      uint32_t access_mode:1;
> +      uint32_t mask_control:1;
> +      uint32_t dependency_control:2;
> +      uint32_t quarter_control:2;
> +      uint32_t thread_control:2;
> +      uint32_t predicate_control:4;
> +      uint32_t predicate_inverse:1;
> +      uint32_t execution_size:3;
> +      uint32_t saturate:1;
> +      uint32_t flag_sub_reg_nr:1;
> +      uint32_t flag_reg_nr:1;
> +      uint32_t pad:23;
> +    };
> +    uint32_t data;
> +  };
> +  union DataTypeBits{
> +    struct {
> +      uint32_t dest_reg_file:2;
> +      uint32_t dest_reg_type:3;
> +      uint32_t src0_reg_file:2;
> +      uint32_t src0_reg_type:3;
> +      uint32_t src1_reg_file:2;
> +      uint32_t src1_reg_type:3;
> +      uint32_t dest_horiz_stride:2;
> +      uint32_t dest_address_mode:1;
> +      uint32_t pad:14;
> +    };
> +    uint32_t data;
> +  };
> +  union SubRegBits {
> +    struct {
> +      uint32_t dest_subreg_nr:5;
> +      uint32_t src0_subreg_nr:5;
> +      uint32_t src1_subreg_nr:5;
> +      uint32_t pad:17;
> +    };
> +    uint32_t data;
> +  };
> +  union SrcRegBits {
> +    struct {
> +      uint32_t src_abs:1;
> +      uint32_t src_negate:1;
> +      uint32_t src_address_mode:1;
> +      uint32_t src_horiz_stride:2;
> +      uint32_t src_width:3;
> +      uint32_t src_vert_stride:4;
> +      uint32_t pad:20;
> +    };
> +    uint32_t data;
> +  };
> +
> +  void decompactInstruction(GenCompactInstruction * p, GenNativeInstruction *pOut) {
> +
> +    memset(pOut, 0, sizeof(GenNativeInstruction));
> +    union ControlBits control_bits;
> +    control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern;
> +    pOut->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8);
> +    pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod;
> +    pOut->header.saturate = control_bits.saturate;
> +    pOut->header.acc_wr_control = p->bits1.acc_wr_control;
> +    pOut->header.cmpt_control = p->bits1.cmpt_control;
> +    pOut->header.debug_control = p->bits1.debug_control;
> +
> +    union DataTypeBits data_type_bits;
> +    union SubRegBits subreg_bits;
> +    union SrcRegBits src0_bits;
> +    data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern;
> +    subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern;
> +    src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern;
> +
> +    pOut->low.high |= data_type_bits.data & 0x7fff;
> +    pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride;
> +    pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode;
> +    pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr;
> +    pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr;
> +
> +    pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr;
> +    pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr;
> +    pOut->high.low |= (src0_bits.data << 13);
> +    pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr;
> +    pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr;
> +
> +    if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) {
> +      uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8);
> +      pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm;
> +    } else {
> +      union SrcRegBits src1_bits;
> +      src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern;
> +      pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr;
> +      pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr;
> +      pOut->high.high |= (src1_bits.data << 13);
> +    }
> +  }
> +
> +  int compactControlBits(GenEncoder *p, uint32_t quarter, uint32_t execWidth) {
> +
> +    const GenInstructionState *s = &p->curr;
> +    // some quick check
> +    if(s->nibControl != 0)
> +      return -1;
> +    if(s->predicate > GEN_PREDICATE_NORMAL)
> +      return -1;
> +    if(s->flag == 1)
> +      return -1;
> +
> +    ControlBits b;
> +    b.data = 0;
> +
> +    if (execWidth == 8)
> +      b.execution_size = GEN_WIDTH_8;
> +    else if (execWidth == 16)
> +      b.execution_size = GEN_WIDTH_16;
> +    else if (execWidth == 1)
> +      b.execution_size = GEN_WIDTH_1;
> +    else
> +      NOT_IMPLEMENTED;
> +
> +    b.mask_control = s->noMask;
> +    b.quarter_control = quarter;
> +    b.predicate_control = s->predicate;
> +    b.predicate_inverse = s->inversePredicate;
> +
> +    b.saturate = s->saturate;
> +    b.flag_sub_reg_nr = s->subFlag;
> +    b.flag_reg_nr = s->flag;
> +
> +    compact_table_entry key;
> +    key.bit_pattern = b.data;
> +
> +    compact_table_entry *r = (compact_table_entry *)bsearch(&key, control_table,
> +      sizeof(control_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> +    if (r == NULL)
> +      return -1;
> +    return r->index;
> +  }
> +
> +  int compactDataTypeBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
> +
> +    // compact does not support any indirect acess
> +    if(dst->address_mode != GEN_ADDRESS_DIRECT)
> +      return -1;
> +
> +    if(src0->file == GEN_IMMEDIATE_VALUE)
> +      return -1;
> +
> +    DataTypeBits b;
> +    b.data = 0;
> +
> +    b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride;
> +    b.dest_address_mode = dst->address_mode;
> +    b.dest_reg_file = dst->file;
> +    b.dest_reg_type = dst->type;
> +
> +    b.src0_reg_file = src0->file;
> +    b.src0_reg_type = src0->type;
> +
> +    if(src1) {
> +      b.src1_reg_type = src1->type;
> +      b.src1_reg_file = src1->file;
> +    } else {
> +      // default to zero
> +      b.src1_reg_type = 0;
> +      b.src1_reg_file = 0;
> +    }
> +
> +    compact_table_entry key;
> +    key.bit_pattern = b.data;
> +
> +    compact_table_entry *r = (compact_table_entry *)bsearch(&key, data_type_table,
> +                             sizeof(data_type_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> +    if (r == NULL)
> +      return -1;
> +    return r->index;
> +  }
> +  int compactSubRegBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
> +    SubRegBits b;
> +    b.data = 0;
> +    b.dest_subreg_nr = dst->subnr;
> +    b.src0_subreg_nr = src0->subnr;
> +    if(src1)
> +      b.src1_subreg_nr = src1->subnr;
> +    else
> +      b.src1_subreg_nr = 0;
> +
> +    compact_table_entry key;
> +    key.bit_pattern = b.data;
> +
> +    compact_table_entry *r = (compact_table_entry *)bsearch(&key, subreg_table,
> +                sizeof(subreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> +    if (r == NULL)
> +      return -1;
> +    return r->index;
> +  }
> +  int compactSrcRegBits(GenEncoder *p, GenRegister *src) {
> +    // As we only use GEN_ALIGN_1 and compact only support direct register access,
> +    // we only need to verify [hstride, width, vstride]
> +    if(src->file == GEN_IMMEDIATE_VALUE)
> +      return -1;
> +    if(src->address_mode != GEN_ADDRESS_DIRECT)
> +      return -1;
> +
> +    SrcRegBits b;
> +    b.data = 0;
> +    b.src_abs = src->absolute;
> +    b.src_negate = src->negation;
> +    b.src_address_mode = src->address_mode;
> +    if(p->curr.execWidth == 1 && src->width == GEN_WIDTH_1) {
> +      b.src_width = src->width;
> +      b.src_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
> +      b.src_vert_stride = GEN_VERTICAL_STRIDE_0;
> +    }
> +    else {
> +      b.src_horiz_stride = src->hstride;
> +      b.src_width = src->width;
> +      b.src_vert_stride = src->vstride;
> +    }
> +    compact_table_entry key;
> +    key.bit_pattern = b.data;
> +
> +    compact_table_entry *r = (compact_table_entry *)bsearch(&key, srcreg_table,
> +                    sizeof(srcreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
> +    if (r == NULL)
> +      return -1;
> +    return r->index;
> +  }
> +
> +  bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split) {
> +    if(split) {
> +      // TODO support it
> +      return false;
> +    } else {
> +      int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
> +      if(control_index == -1) return false;
> +
> +      int data_type_index = compactDataTypeBits(p, &dst, &src, NULL);
> +      if(data_type_index == -1) return false;
> +
> +      int sub_reg_index = compactSubRegBits(p, &dst, &src, NULL);
> +      if(sub_reg_index == -1) return false;
> +
> +      int src_reg_index = compactSrcRegBits(p, &src);
> +      if(src_reg_index == -1) return false;
> +
> +      GenCompactInstruction * insn = p->nextCompact(opcode);
> +      insn->bits1.control_index = control_index;
> +      insn->bits1.data_type_index = data_type_index;
> +      insn->bits1.sub_reg_index = sub_reg_index;
> +      insn->bits1.acc_wr_control = p->curr.accWrEnable;
> +      insn->bits1.destreg_or_condmod = condition;
> +      insn->bits1.cmpt_control = 1;
> +      insn->bits1.src0_index_lo = src_reg_index & 3;
> +
> +      insn->bits2.src0_index_hi = src_reg_index >> 2;
> +      insn->bits2.src1_index = 0;
> +      insn->bits2.dest_reg_nr = dst.nr;
> +      insn->bits2.src0_reg_nr = src.nr;
> +      insn->bits2.src1_reg_nr = 0;
> +      return true;
> +    }
> +  }
> +
> +  bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split) {
> +    if(split) {
> +      // TODO support it
> +      return false;
> +    } else {
> +      if(opcode == GEN_OPCODE_IF  || opcode == GEN_OPCODE_ENDIF || opcode == GEN_OPCODE_JMPI) return false;
> +
> +      int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
> +      if(control_index == -1) return false;
> +
> +      int data_type_index = compactDataTypeBits(p, &dst, &src0, &src1);
> +      if(data_type_index == -1) return false;
> +
> +      int sub_reg_index = compactSubRegBits(p, &dst, &src0, &src1);
> +      if(sub_reg_index == -1) return false;
> +
> +      int src0_reg_index = compactSrcRegBits(p, &src0);
> +      if(src0_reg_index == -1) return false;
> +
> +      bool src1_imm = false;
> +      int src1_reg_index;
> +      if(src1.file == GEN_IMMEDIATE_VALUE) {
> +        if(src1.absolute != 0 || src1.negation != 0 || src1.type == GEN_TYPE_F)
> +          return false;
> +        if(src1.value.d < -4096 || src1.value.d > 4095) // 13bit signed imm
> +          return false;
> +        src1_imm = true;
> +      } else {
> +        src1_reg_index = compactSrcRegBits(p, &src1);
> +        if(src1_reg_index == -1) return false;
> +      }
> +      GenCompactInstruction * insn = p->nextCompact(opcode);
> +      insn->bits1.control_index = control_index;
> +      insn->bits1.data_type_index = data_type_index;
> +      insn->bits1.sub_reg_index = sub_reg_index;
> +      insn->bits1.acc_wr_control = p->curr.accWrEnable;
> +      insn->bits1.destreg_or_condmod = condition;
> +      insn->bits1.cmpt_control = 1;
> +      insn->bits1.src0_index_lo = src0_reg_index & 3;
> +
> +      insn->bits2.src0_index_hi = src0_reg_index >> 2;
> +      insn->bits2.src1_index = src1_imm ? (src1.value.ud & 8191)>> 8 : src1_reg_index;
> +      insn->bits2.dest_reg_nr = dst.nr;
> +      insn->bits2.src0_reg_nr = src0.nr;
> +      insn->bits2.src1_reg_nr = src1_imm ? (src1.value.ud & 0xff): src1.nr;
> +      return true;
> +    }
> +  }
> +};
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index d0e3d0b..e04a2c2 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3089,7 +3089,7 @@ namespace gbe
>        sel.push();
>          sel.curr.noMask = 1;
>          sel.curr.predicate = GEN_PREDICATE_NONE;
> -        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1);
> +        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
>        sel.pop();
>  
>        if (sel.block->hasBarrier) {
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index 937f5b2..0794d48 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -524,7 +524,7 @@ namespace gbe
>                cmp0->state.subFlag = insn.state.subFlag;
>                cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
>                cmp0->src(1) = GenRegister::immuw(0);
> -              cmp0->dst(0) = GenRegister::null();
> +              cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
>                cmp0->extra.function = GEN_CONDITIONAL_NEQ;
>                insn.prepend(*cmp0);
>                validatedFlags.insert(insn.state.flagIndex);
> @@ -545,7 +545,7 @@ namespace gbe
>                cmp0->state.subFlag = insn.state.subFlag;
>                cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
>                cmp0->src(1) = GenRegister::immuw(0);
> -              cmp0->dst(0) = GenRegister::null();
> +              cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
>                cmp0->extra.function = GEN_CONDITIONAL_NEQ;
>                insn.prepend(*cmp0);
>              }
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 0480dd8..6863aab 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -551,13 +551,13 @@ namespace gbe
>  
>      static INLINE GenRegister immuw(uint16_t uw) {
>        GenRegister immediate = imm(GEN_TYPE_UW);
> -      immediate.value.ud = uw | (uw << 16);
> +      immediate.value.ud = uw;
>        return immediate;
>      }
>  
>      static INLINE GenRegister immw(int16_t w) {
>        GenRegister immediate = imm(GEN_TYPE_W);
> -      immediate.value.d = w | (w << 16);
> +      immediate.value.d = w;
>        return immediate;
>      }
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list