[Beignet] [PATCH] GBE: Implement instruction compact.

Ruiling Song ruiling.song at intel.com
Tue Apr 15 01:53:17 PDT 2014


A native GEN ASM would takes 2*64bit, but GEN also support compact instruction
which only takes 64bit. To make code easily understood, GenInstruction now only
stands for 64bit memory, and use GenNativeInstruction & GenCompactInstruction
to represent normal(native) and compact instruction.

After this change, it is not easily to map SelectionInstruction distance to ASM distance.
As the instructions in the distance maybe compacted. To not introduce too much
complexity, JMP, IF, ENDIF, NOP will NEVER be compacted.

Some experiment in luxMark shows it could reduce about 20% instruction memory.
But it is sad that no performance improvement observed.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/CMakeLists.txt                 |    1 +
 backend/src/backend/gen/gen_mesa_disasm.c  |   20 +-
 backend/src/backend/gen_context.cpp        |   41 +-
 backend/src/backend/gen_defs.hpp           |  953 +++++++++++++++-------------
 backend/src/backend/gen_encoder.cpp        |  116 ++--
 backend/src/backend/gen_encoder.hpp        |   11 +-
 backend/src/backend/gen_insn_compact.cpp   |  521 +++++++++++++++
 backend/src/backend/gen_insn_selection.cpp |    2 +-
 backend/src/backend/gen_reg_allocation.cpp |    4 +-
 backend/src/backend/gen_register.hpp       |    4 +-
 10 files changed, 1133 insertions(+), 540 deletions(-)
 create mode 100644 backend/src/backend/gen_insn_compact.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index d6f2d3c..9c96f33 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -162,6 +162,7 @@ else (GBE_USE_BLOB)
     backend/gen_program.hpp
     backend/gen_program.h
     backend/gen_defs.hpp
+    backend/gen_insn_compact.cpp
     backend/gen_encoder.hpp
     backend/gen_encoder.cpp)
 
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index e58ef31..871277b 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -533,7 +533,7 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
   return err;
 }
 
-static int dest (FILE *file, const struct GenInstruction *inst)
+static int dest (FILE *file, const union GenNativeInstruction *inst)
 {
   int	err = 0;
 
@@ -587,7 +587,7 @@ static int dest (FILE *file, const struct GenInstruction *inst)
   return 0;
 }
 
-static int dest_3src (FILE *file, const struct GenInstruction *inst)
+static int dest_3src (FILE *file, const union GenNativeInstruction *inst)
 {
   int	err = 0;
   const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE;
@@ -720,7 +720,7 @@ static int src_da16 (FILE *file,
   return err;
 }
 
-static int src0_3src (FILE *file, const struct GenInstruction *inst)
+static int src0_3src (FILE *file, const union GenNativeInstruction *inst)
 {
   int err = 0;
   uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
@@ -768,7 +768,7 @@ static int src0_3src (FILE *file, const struct GenInstruction *inst)
   return err;
 }
 
-static int src1_3src (FILE *file, const struct GenInstruction *inst)
+static int src1_3src (FILE *file, const union GenNativeInstruction *inst)
 {
   int err = 0;
   uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
@@ -821,7 +821,7 @@ static int src1_3src (FILE *file, const struct GenInstruction *inst)
 }
 
 
-static int src2_3src (FILE *file, const struct GenInstruction *inst)
+static int src2_3src (FILE *file, const union GenNativeInstruction *inst)
 {
   int err = 0;
   uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
@@ -871,7 +871,7 @@ static int src2_3src (FILE *file, const struct GenInstruction *inst)
   return err;
 }
 
-static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
+static int imm (FILE *file, uint32_t type, const union GenNativeInstruction *inst) {
   switch (type) {
     case GEN_TYPE_UD:
       format (file, "0x%xUD", inst->bits3.ud);
@@ -900,7 +900,7 @@ static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
   return 0;
 }
 
-static int src0 (FILE *file, const struct GenInstruction *inst)
+static int src0 (FILE *file, const union GenNativeInstruction *inst)
 {
   if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE)
     return imm (file, inst->bits1.da1.src0_reg_type,
@@ -960,7 +960,7 @@ static int src0 (FILE *file, const struct GenInstruction *inst)
   }
 }
 
-static int src1 (FILE *file, const struct GenInstruction *inst)
+static int src1 (FILE *file, const union GenNativeInstruction *inst)
 {
   if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE)
     return imm (file, inst->bits1.da1.src1_reg_type,
@@ -1029,7 +1029,7 @@ static const int esize[6] = {
   [5] = 32,
 };
 
-static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
+static int qtr_ctrl(FILE *file, const union GenNativeInstruction *inst)
 {
   int qtr_ctl = inst->header.quarter_control;
   int exec_size = esize[inst->header.execution_size];
@@ -1060,7 +1060,7 @@ static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
 
 int gen_disasm (FILE *file, const void *opaque_insn)
 {
-  const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn;
+  const union GenNativeInstruction *inst = (const union GenNativeInstruction *) opaque_insn;
   int	err = 0;
   int space = 0;
   int gen = 7;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 50f10c5..f8292d6 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -39,6 +39,7 @@
 
 namespace gbe
 {
+  extern void decompactInstruction(union GenCompactInstruction *p, union GenNativeInstruction *pOut);
   ///////////////////////////////////////////////////////////////////////////
   // GenContext implementation
   ///////////////////////////////////////////////////////////////////////////
@@ -88,16 +89,18 @@ namespace gbe
       const LabelIndex label = pair.first;
       const int32_t insnID = pair.second;
       const int32_t targetID = labelPos.find(label)->second;
-      p->patchJMPI(insnID, (targetID - insnID) * 2);
+      p->patchJMPI(insnID, (targetID - insnID));
     }
     for (auto pair : branchPos3) {
       const LabelPair labelPair = pair.first;
       const int32_t insnID = pair.second;
-      const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0;
-      const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1;
-      assert((jip - insnID) * 2 < 32767 && (jip - insnID) * 2 > -32768);
-      assert((uip - insnID) * 2 < 32767 && (uip - insnID) * 2 > -32768);
-      p->patchJMPI(insnID, (((uip - insnID) * 2) << 16) | ((jip - insnID) * 2));
+      // FIXME the 'labelPair' implementation must be fixed, as it is hard to
+      // convert InstructionSelection offset to ASM offset since asm maybe compacted
+      const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0*2;
+      const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1*2;
+      assert((jip - insnID) < 32767 && (jip - insnID) > -32768);
+      assert((uip - insnID) < 32767 && (uip - insnID) > -32768);
+      p->patchJMPI(insnID, (((uip - insnID)) << 16) | ((jip - insnID)));
     }
   }
 
@@ -975,7 +978,7 @@ namespace gbe
       p->SHL(high, low, tmp);
       p->MOV(low, GenRegister::immud(0));
 
-      p->patchJMPI(jip1, (p->n_instruction() - jip1) * 2);
+      p->patchJMPI(jip1, (p->n_instruction() - jip1) );
       p->curr.predicate = GEN_PREDICATE_NONE;
       p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(31));  //update dst where high != 0
       p->curr.predicate = GEN_PREDICATE_NORMAL;
@@ -989,7 +992,7 @@ namespace gbe
       p->CMP(GEN_CONDITIONAL_EQ, high, GenRegister::immud(0x80000000));
       p->CMP(GEN_CONDITIONAL_EQ, low, GenRegister::immud(0x0));
       p->AND(dst_ud, dst_ud, GenRegister::immud(0xfffffffe));
-      p->patchJMPI(jip0, (p->n_instruction() - jip0) * 2);
+      p->patchJMPI(jip0, (p->n_instruction() - jip0));
 
     p->pop();
 
@@ -1426,6 +1429,7 @@ namespace gbe
     GenRegister zero = GenRegister::immud(0),
                 one = GenRegister::immud(1),
                 imm31 = GenRegister::immud(31);
+    uint32_t jip0;
     // (a,b) <- x
     loadTopHalf(a, x);
     loadBottomHalf(b, x);
@@ -1516,10 +1520,11 @@ namespace gbe
         p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
       else
         NOT_IMPLEMENTED;
-      int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
+      int distance = -(int)(p->n_instruction() - loop_start );
       p->curr.noMask = 1;
+      jip0 = p->n_instruction();
       p->JMPI(zero);
-      p->patchJMPI(p->n_instruction() - 1, jip + 2);
+      p->patchJMPI(jip0, distance);
       p->pop();
       // end of loop
     }
@@ -2001,14 +2006,24 @@ namespace gbe
     if (OCL_OUTPUT_ASM) {
       std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
       ir::LabelIndex curLabel = (ir::LabelIndex)0;
+      GenCompactInstruction * pCom = NULL;
+      GenNativeInstruction insn;
       std::cout << "  L0:" << std::endl;
-      for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) {
+      for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
         if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
           std::cout << "  L" << curLabel + 1 << ":" << std::endl;
           curLabel = (ir::LabelIndex)(curLabel + 1);
         }
-        std::cout << "    (" << std::setw(8) << insnID * 2 << ")  ";
-        gen_disasm(stdout, &p->store[insnID]);
+        std::cout << "    (" << std::setw(8) << insnID << ")  ";
+        pCom = (GenCompactInstruction*)&p->store[insnID];
+        if(pCom->bits1.cmpt_control == 1) {
+          decompactInstruction(pCom, &insn);
+          gen_disasm(stdout, &insn);
+          insnID++;
+        } else {
+          gen_disasm(stdout, &p->store[insnID]);
+          insnID = insnID + 2;
+        }
       }
       std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
     }
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index e731174..4ad1cd1 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -436,475 +436,512 @@ enum GenMessageTarget {
 #define GEN_MAX_GRF 128
 
 /* Instruction format for the execution units */
-struct GenInstruction
-{
-  struct {
-    uint32_t opcode:7;
-    uint32_t pad:1;
-    uint32_t access_mode:1;
-    uint32_t mask_control:1;
-    uint32_t dependency_control:2;
-    uint32_t quarter_control:2;
-    uint32_t thread_control:2;
-    uint32_t predicate_control:4;
-    uint32_t predicate_inverse:1;
-    uint32_t execution_size:3;
-    uint32_t destreg_or_condmod:4;
-    uint32_t acc_wr_control:1;
-    uint32_t cmpt_control:1;
-    uint32_t debug_control:1;
-    uint32_t saturate:1;
-  } header;
-
-  union {
-    struct {
-      uint32_t dest_reg_file:2;
-      uint32_t dest_reg_type:3;
-      uint32_t src0_reg_file:2;
-      uint32_t src0_reg_type:3;
-      uint32_t src1_reg_file:2;
-      uint32_t src1_reg_type:3;
-      uint32_t nib_ctrl:1;
-      uint32_t dest_subreg_nr:5;
-      uint32_t dest_reg_nr:8;
-      uint32_t dest_horiz_stride:2;
-      uint32_t dest_address_mode:1;
-    } da1;
-
-    struct {
-      uint32_t dest_reg_file:2;
-      uint32_t dest_reg_type:3;
-      uint32_t src0_reg_file:2;
-      uint32_t src0_reg_type:3;
-      uint32_t src1_reg_file:2;        /* 0x00000c00 */
-      uint32_t src1_reg_type:3;        /* 0x00007000 */
-      uint32_t nib_ctrl:1;
-      int dest_indirect_offset:10;        /* offset against the deref'd address reg */
-      uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
-      uint32_t dest_horiz_stride:2;
-      uint32_t dest_address_mode:1;
-    } ia1;
-
-    struct {
-      uint32_t dest_reg_file:2;
-      uint32_t dest_reg_type:3;
-      uint32_t src0_reg_file:2;
-      uint32_t src0_reg_type:3;
-      uint32_t src1_reg_file:2;
-      uint32_t src1_reg_type:3;
-      uint32_t nib_ctrl:1;
-      uint32_t dest_writemask:4;
-      uint32_t dest_subreg_nr:1;
-      uint32_t dest_reg_nr:8;
-      uint32_t dest_horiz_stride:2;
-      uint32_t dest_address_mode:1;
-    } da16;
 
-    struct {
-      uint32_t dest_reg_file:2;
-      uint32_t dest_reg_type:3;
-      uint32_t src0_reg_file:2;
-      uint32_t src0_reg_type:3;
-      uint32_t nib_ctrl:1;
-      uint32_t dest_writemask:4;
-      int dest_indirect_offset:6;
-      uint32_t dest_subreg_nr:3;
-      uint32_t dest_horiz_stride:2;
-      uint32_t dest_address_mode:1;
-    } ia16;
+struct GenInstruction {
+  uint32_t low;
+  uint32_t high;
+};
 
+union GenCompactInstruction {
+  struct GenInstruction low;
+  struct {
     struct {
-      uint32_t dest_reg_file:2;
-      uint32_t dest_reg_type:3;
-      uint32_t src0_reg_file:2;
-      uint32_t src0_reg_type:3;
-      uint32_t src1_reg_file:2;
-      uint32_t src1_reg_type:3;
+      uint32_t opcode:7;
+      uint32_t debug_control:1;
+      uint32_t control_index:5;
+      uint32_t data_type_index:5;
+      uint32_t sub_reg_index:5;
+      uint32_t acc_wr_control:1;
+      uint32_t destreg_or_condmod:4;
       uint32_t pad:1;
-      int jump_count:16;
-    } branch_gen6;
-
+      uint32_t cmpt_control:1;
+      uint32_t src0_index_lo:2;
+    } bits1;
     struct {
-      uint32_t dest_reg_file:1;
-      uint32_t flag_subreg_num:1;
-      uint32_t pad0:2;
-      uint32_t src0_abs:1;
-      uint32_t src0_negate:1;
-      uint32_t src1_abs:1;
-      uint32_t src1_negate:1;
-      uint32_t src2_abs:1;
-      uint32_t src2_negate:1;
-      uint32_t pad1:7;
-      uint32_t dest_writemask:4;
-      uint32_t dest_subreg_nr:3;
+      uint32_t src0_index_hi:3;
+      uint32_t src1_index:5;
       uint32_t dest_reg_nr:8;
-    } da3src;
-  } bits1;
-
-  union {
-    struct {
-      uint32_t src0_subreg_nr:5;
       uint32_t src0_reg_nr:8;
-      uint32_t src0_abs:1;
-      uint32_t src0_negate:1;
-      uint32_t src0_address_mode:1;
-      uint32_t src0_horiz_stride:2;
-      uint32_t src0_width:3;
-      uint32_t src0_vert_stride:4;
-      uint32_t flag_sub_reg_nr:1;
-      uint32_t flag_reg_nr:1;
-      uint32_t pad:5;
-    } da1;
-
-    struct {
-      int src0_indirect_offset:10;
-      uint32_t src0_subreg_nr:3;
-      uint32_t src0_abs:1;
-      uint32_t src0_negate:1;
-      uint32_t src0_address_mode:1;
-      uint32_t src0_horiz_stride:2;
-      uint32_t src0_width:3;
-      uint32_t src0_vert_stride:4;
-      uint32_t flag_sub_reg_nr:1;
-      uint32_t flag_reg_nr:1;
-      uint32_t pad:5;
-    } ia1;
-
-    struct {
-      uint32_t src0_swz_x:2;
-      uint32_t src0_swz_y:2;
-      uint32_t src0_subreg_nr:1;
-      uint32_t src0_reg_nr:8;
-      uint32_t src0_abs:1;
-      uint32_t src0_negate:1;
-      uint32_t src0_address_mode:1;
-      uint32_t src0_swz_z:2;
-      uint32_t src0_swz_w:2;
-      uint32_t pad0:1;
-      uint32_t src0_vert_stride:4;
-      uint32_t flag_sub_reg_nr:1;
-      uint32_t flag_reg_nr:1;
-      uint32_t pad:5;
-    } da16;
-
-    struct {
-      uint32_t src0_swz_x:2;
-      uint32_t src0_swz_y:2;
-      int src0_indirect_offset:6;
-      uint32_t src0_subreg_nr:3;
-      uint32_t src0_abs:1;
-      uint32_t src0_negate:1;
-      uint32_t src0_address_mode:1;
-      uint32_t src0_swz_z:2;
-      uint32_t src0_swz_w:2;
-      uint32_t pad0:1;
-      uint32_t src0_vert_stride:4;
-      uint32_t flag_sub_reg_nr:1;
-      uint32_t flag_reg_nr:1;
-      uint32_t pad:5;
-    } ia16;
-
-    struct {
-      uint32_t src0_rep_ctrl:1;
-      uint32_t src0_swizzle:8;
-      uint32_t src0_subreg_nr:3;
-      uint32_t src0_reg_nr:8;
-      uint32_t pad0:1;
-      uint32_t src1_rep_ctrl:1;
-      uint32_t src1_swizzle:8;
-      uint32_t src1_subreg_nr_low:2;
-    } da3src;
-  } bits2;
-
-  union {
-    struct {
-      uint32_t src1_subreg_nr:5;
       uint32_t src1_reg_nr:8;
-      uint32_t src1_abs:1;
-      uint32_t src1_negate:1;
-      uint32_t src1_address_mode:1;
-      uint32_t src1_horiz_stride:2;
-      uint32_t src1_width:3;
-      uint32_t src1_vert_stride:4;
-      uint32_t pad0:7;
-    } da1;
-
-    struct {
-      uint32_t src1_swz_x:2;
-      uint32_t src1_swz_y:2;
-      uint32_t src1_subreg_nr:1;
-      uint32_t src1_reg_nr:8;
-      uint32_t src1_abs:1;
-      uint32_t src1_negate:1;
-      uint32_t src1_address_mode:1;
-      uint32_t src1_swz_z:2;
-      uint32_t src1_swz_w:2;
-      uint32_t pad1:1;
-      uint32_t src1_vert_stride:4;
-      uint32_t pad2:7;
-    } da16;
-
-    struct {
-      int  src1_indirect_offset:10;
-      uint32_t src1_subreg_nr:3;
-      uint32_t src1_abs:1;
-      uint32_t src1_negate:1;
-      uint32_t src1_address_mode:1;
-      uint32_t src1_horiz_stride:2;
-      uint32_t src1_width:3;
-      uint32_t src1_vert_stride:4;
-      uint32_t pad1:7;
-    } ia1;
-
-    struct {
-      uint32_t src1_swz_x:2;
-      uint32_t src1_swz_y:2;
-      int  src1_indirect_offset:6;
-      uint32_t src1_subreg_nr:3;
-      uint32_t src1_abs:1;
-      uint32_t src1_negate:1;
-      uint32_t pad0:1;
-      uint32_t src1_swz_z:2;
-      uint32_t src1_swz_w:2;
-      uint32_t pad1:1;
-      uint32_t src1_vert_stride:4;
-      uint32_t pad2:7;
-    } ia16;
-
-    struct {
-      uint32_t function_control:19;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad1:2;
-      uint32_t end_of_thread:1;
-    } generic_gen5;
-
-    struct {
-      uint32_t sub_function_id:3;
-      uint32_t pad0:11;
-      uint32_t ack_req:1;
-      uint32_t notify:2;
-      uint32_t pad1:2;
-      uint32_t header:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } msg_gateway;
-
-    struct {
-      uint32_t opcode:1;
-      uint32_t request:1;
-      uint32_t pad0:2;
-      uint32_t resource:1;
-      uint32_t pad1:14;
-      uint32_t header:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } spawner_gen5;
-
-    /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
-    struct {
-      uint32_t function:4;
-      uint32_t int_type:1;
-      uint32_t precision:1;
-      uint32_t saturate:1;
-      uint32_t data_type:1;
-      uint32_t snapshot:1;
-      uint32_t pad0:10;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad1:2;
-      uint32_t end_of_thread:1;
-    } math_gen5;
+    } bits2;
+  };
+};
 
+union GenNativeInstruction
+{
+  struct {
+    struct GenInstruction low;
+    struct GenInstruction high;
+  };
+  struct {
     struct {
-      uint32_t bti:8;
-      uint32_t sampler:4;
-      uint32_t msg_type:5;
-      uint32_t simd_mode:2;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad1:2;
-      uint32_t end_of_thread:1;
-    } sampler_gen7;
-
-    /**
-     * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
-     *
-     * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
-     **/
-    struct {
-      uint32_t bti:8;
-      uint32_t msg_control:5;
-      uint32_t msg_type:3;
-      uint32_t pad0:3;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad1:2;
-      uint32_t end_of_thread:1;
-    } gen6_dp_sampler_const_cache;
-
-    /*! Data port untyped read / write messages */
-    struct {
-      uint32_t bti:8;
-      uint32_t rgba:4;
-      uint32_t simd_mode:2;
-      uint32_t msg_type:4;
-      uint32_t category:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } gen7_untyped_rw;
-
-    /*! Data port byte scatter / gather */
-    struct {
-      uint32_t bti:8;
-      uint32_t simd_mode:1;
-      uint32_t ignored0:1;
-      uint32_t data_size:2;
-      uint32_t ignored1:2;
-      uint32_t msg_type:4;
-      uint32_t category:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } gen7_byte_rw;
-
-    /*! Data port Scratch Read/ write */
-    struct {
-      uint32_t offset:12;
-      uint32_t block_size:2;
-      uint32_t ignored0:1;
-      uint32_t invalidate_after_read:1;
-      uint32_t channel_mode:1;
-      uint32_t msg_type:1;
-      uint32_t category:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } gen7_scratch_rw;
-
-    /*! Data port OBlock read / write */
-    struct {
-      uint32_t bti:8;
-      uint32_t block_size:3;
-      uint32_t ignored:2;
-      uint32_t invalidate_after_read:1;
-      uint32_t msg_type:4;
-      uint32_t category:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } gen7_oblock_rw;
-
-    /*! Data port dword scatter / gather */
-    struct {
-      uint32_t bti:8;
-      uint32_t block_size:2;
-      uint32_t ignored0:3;
-      uint32_t invalidate_after_read:1;
-      uint32_t msg_type:4;
-      uint32_t ignored1:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad2:2;
-      uint32_t end_of_thread:1;
-    } gen7_dword_rw;
-
-    /*! Data port typed read / write messages */
-    struct {
-      uint32_t bti:8;
-      uint32_t chan_mask:4;
+      uint32_t opcode:7;
       uint32_t pad:1;
-      uint32_t slot:1;
-      uint32_t msg_type:4;
-      uint32_t pad2:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad3:2;
-      uint32_t end_of_thread:1;
-    } gen7_typed_rw;
-
-    /*! Memory fence */
-    struct {
-      uint32_t bti:8;
-      uint32_t pad:5;
-      uint32_t commit_enable:1;
-      uint32_t msg_type:4;
-      uint32_t pad2:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad3:2;
-      uint32_t end_of_thread:1;
-    } gen7_memory_fence;
-
-    /*! atomic messages */
-    struct {
-      uint32_t bti:8;
-      uint32_t aop_type:4;
-      uint32_t simd_mode:1;
-      uint32_t return_data:1;
-      uint32_t msg_type:4;
-      uint32_t category:1;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad3:2;
-      uint32_t end_of_thread:1;
-    } gen7_atomic_op;
-
-    struct {
-      uint32_t src1_subreg_nr_high:1;
-      uint32_t src1_reg_nr:8;
-      uint32_t pad0:1;
-      uint32_t src2_rep_ctrl:1;
-      uint32_t src2_swizzle:8;
-      uint32_t src2_subreg_nr:3;
-      uint32_t src2_reg_nr:8;
-      uint32_t pad1:2;
-    } da3src;
-
-    /*! Message gateway */
-    struct {
-      uint32_t subfunc:3;
-      uint32_t pad:11;
-      uint32_t ackreq:1;
-      uint32_t notify:2;
-      uint32_t pad2:2;
-      uint32_t header_present:1;
-      uint32_t response_length:5;
-      uint32_t msg_length:4;
-      uint32_t pad3:2;
-      uint32_t end_of_thread:1;
-    } gen7_msg_gw;
-
-    struct {
-      uint32_t jip:16;
-      uint32_t uip:16;
-    } gen7_branch;
-
-    int d;
-    uint32_t ud;
-    float f;
-  } bits3;
+      uint32_t access_mode:1;
+      uint32_t mask_control:1;
+      uint32_t dependency_control:2;
+      uint32_t quarter_control:2;
+      uint32_t thread_control:2;
+      uint32_t predicate_control:4;
+      uint32_t predicate_inverse:1;
+      uint32_t execution_size:3;
+      uint32_t destreg_or_condmod:4;
+      uint32_t acc_wr_control:1;
+      uint32_t cmpt_control:1;
+      uint32_t debug_control:1;
+      uint32_t saturate:1;
+    } header;
+
+    union {
+      struct {
+        uint32_t dest_reg_file:2;
+        uint32_t dest_reg_type:3;
+        uint32_t src0_reg_file:2;
+        uint32_t src0_reg_type:3;
+        uint32_t src1_reg_file:2;
+        uint32_t src1_reg_type:3;
+        uint32_t nib_ctrl:1;
+        uint32_t dest_subreg_nr:5;
+        uint32_t dest_reg_nr:8;
+        uint32_t dest_horiz_stride:2;
+        uint32_t dest_address_mode:1;
+      } da1;
+
+      struct {
+        uint32_t dest_reg_file:2;
+        uint32_t dest_reg_type:3;
+        uint32_t src0_reg_file:2;
+        uint32_t src0_reg_type:3;
+        uint32_t src1_reg_file:2;        /* 0x00000c00 */
+        uint32_t src1_reg_type:3;        /* 0x00007000 */
+        uint32_t nib_ctrl:1;
+        int dest_indirect_offset:10;        /* offset against the deref'd address reg */
+        uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
+        uint32_t dest_horiz_stride:2;
+        uint32_t dest_address_mode:1;
+      } ia1;
+
+      struct {
+        uint32_t dest_reg_file:2;
+        uint32_t dest_reg_type:3;
+        uint32_t src0_reg_file:2;
+        uint32_t src0_reg_type:3;
+        uint32_t src1_reg_file:2;
+        uint32_t src1_reg_type:3;
+        uint32_t nib_ctrl:1;
+        uint32_t dest_writemask:4;
+        uint32_t dest_subreg_nr:1;
+        uint32_t dest_reg_nr:8;
+        uint32_t dest_horiz_stride:2;
+        uint32_t dest_address_mode:1;
+      } da16;
+
+      struct {
+        uint32_t dest_reg_file:2;
+        uint32_t dest_reg_type:3;
+        uint32_t src0_reg_file:2;
+        uint32_t src0_reg_type:3;
+        uint32_t nib_ctrl:1;
+        uint32_t dest_writemask:4;
+        int dest_indirect_offset:6;
+        uint32_t dest_subreg_nr:3;
+        uint32_t dest_horiz_stride:2;
+        uint32_t dest_address_mode:1;
+      } ia16;
+
+      struct {
+        uint32_t dest_reg_file:2;
+        uint32_t dest_reg_type:3;
+        uint32_t src0_reg_file:2;
+        uint32_t src0_reg_type:3;
+        uint32_t src1_reg_file:2;
+        uint32_t src1_reg_type:3;
+        uint32_t pad:1;
+        int jump_count:16;
+      } branch_gen6;
+
+      struct {
+        uint32_t dest_reg_file:1;
+        uint32_t flag_subreg_num:1;
+        uint32_t pad0:2;
+        uint32_t src0_abs:1;
+        uint32_t src0_negate:1;
+        uint32_t src1_abs:1;
+        uint32_t src1_negate:1;
+        uint32_t src2_abs:1;
+        uint32_t src2_negate:1;
+        uint32_t pad1:7;
+        uint32_t dest_writemask:4;
+        uint32_t dest_subreg_nr:3;
+        uint32_t dest_reg_nr:8;
+      } da3src;
+    } bits1;
+
+    union {
+      struct {
+        uint32_t src0_subreg_nr:5;
+        uint32_t src0_reg_nr:8;
+        uint32_t src0_abs:1;
+        uint32_t src0_negate:1;
+        uint32_t src0_address_mode:1;
+        uint32_t src0_horiz_stride:2;
+        uint32_t src0_width:3;
+        uint32_t src0_vert_stride:4;
+        uint32_t flag_sub_reg_nr:1;
+        uint32_t flag_reg_nr:1;
+        uint32_t pad:5;
+      } da1;
+
+      struct {
+        int src0_indirect_offset:10;
+        uint32_t src0_subreg_nr:3;
+        uint32_t src0_abs:1;
+        uint32_t src0_negate:1;
+        uint32_t src0_address_mode:1;
+        uint32_t src0_horiz_stride:2;
+        uint32_t src0_width:3;
+        uint32_t src0_vert_stride:4;
+        uint32_t flag_sub_reg_nr:1;
+        uint32_t flag_reg_nr:1;
+        uint32_t pad:5;
+      } ia1;
+
+      struct {
+        uint32_t src0_swz_x:2;
+        uint32_t src0_swz_y:2;
+        uint32_t src0_subreg_nr:1;
+        uint32_t src0_reg_nr:8;
+        uint32_t src0_abs:1;
+        uint32_t src0_negate:1;
+        uint32_t src0_address_mode:1;
+        uint32_t src0_swz_z:2;
+        uint32_t src0_swz_w:2;
+        uint32_t pad0:1;
+        uint32_t src0_vert_stride:4;
+        uint32_t flag_sub_reg_nr:1;
+        uint32_t flag_reg_nr:1;
+        uint32_t pad:5;
+      } da16;
+
+      struct {
+        uint32_t src0_swz_x:2;
+        uint32_t src0_swz_y:2;
+        int src0_indirect_offset:6;
+        uint32_t src0_subreg_nr:3;
+        uint32_t src0_abs:1;
+        uint32_t src0_negate:1;
+        uint32_t src0_address_mode:1;
+        uint32_t src0_swz_z:2;
+        uint32_t src0_swz_w:2;
+        uint32_t pad0:1;
+        uint32_t src0_vert_stride:4;
+        uint32_t flag_sub_reg_nr:1;
+        uint32_t flag_reg_nr:1;
+        uint32_t pad:5;
+      } ia16;
+
+      struct {
+        uint32_t src0_rep_ctrl:1;
+        uint32_t src0_swizzle:8;
+        uint32_t src0_subreg_nr:3;
+        uint32_t src0_reg_nr:8;
+        uint32_t pad0:1;
+        uint32_t src1_rep_ctrl:1;
+        uint32_t src1_swizzle:8;
+        uint32_t src1_subreg_nr_low:2;
+      } da3src;
+    } bits2;
+
+    union {
+      struct {
+        uint32_t src1_subreg_nr:5;
+        uint32_t src1_reg_nr:8;
+        uint32_t src1_abs:1;
+        uint32_t src1_negate:1;
+        uint32_t src1_address_mode:1;
+        uint32_t src1_horiz_stride:2;
+        uint32_t src1_width:3;
+        uint32_t src1_vert_stride:4;
+        uint32_t pad0:7;
+      } da1;
+
+      struct {
+        uint32_t src1_swz_x:2;
+        uint32_t src1_swz_y:2;
+        uint32_t src1_subreg_nr:1;
+        uint32_t src1_reg_nr:8;
+        uint32_t src1_abs:1;
+        uint32_t src1_negate:1;
+        uint32_t src1_address_mode:1;
+        uint32_t src1_swz_z:2;
+        uint32_t src1_swz_w:2;
+        uint32_t pad1:1;
+        uint32_t src1_vert_stride:4;
+        uint32_t pad2:7;
+      } da16;
+
+      struct {
+        int  src1_indirect_offset:10;
+        uint32_t src1_subreg_nr:3;
+        uint32_t src1_abs:1;
+        uint32_t src1_negate:1;
+        uint32_t src1_address_mode:1;
+        uint32_t src1_horiz_stride:2;
+        uint32_t src1_width:3;
+        uint32_t src1_vert_stride:4;
+        uint32_t pad1:7;
+      } ia1;
+
+      struct {
+        uint32_t src1_swz_x:2;
+        uint32_t src1_swz_y:2;
+        int  src1_indirect_offset:6;
+        uint32_t src1_subreg_nr:3;
+        uint32_t src1_abs:1;
+        uint32_t src1_negate:1;
+        uint32_t pad0:1;
+        uint32_t src1_swz_z:2;
+        uint32_t src1_swz_w:2;
+        uint32_t pad1:1;
+        uint32_t src1_vert_stride:4;
+        uint32_t pad2:7;
+      } ia16;
+
+      struct {
+        uint32_t function_control:19;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad1:2;
+        uint32_t end_of_thread:1;
+      } generic_gen5;
+
+      struct {
+        uint32_t sub_function_id:3;
+        uint32_t pad0:11;
+        uint32_t ack_req:1;
+        uint32_t notify:2;
+        uint32_t pad1:2;
+        uint32_t header:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } msg_gateway;
+
+      struct {
+        uint32_t opcode:1;
+        uint32_t request:1;
+        uint32_t pad0:2;
+        uint32_t resource:1;
+        uint32_t pad1:14;
+        uint32_t header:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } spawner_gen5;
+
+      /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+      struct {
+        uint32_t function:4;
+        uint32_t int_type:1;
+        uint32_t precision:1;
+        uint32_t saturate:1;
+        uint32_t data_type:1;
+        uint32_t snapshot:1;
+        uint32_t pad0:10;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad1:2;
+        uint32_t end_of_thread:1;
+      } math_gen5;
+
+      struct {
+        uint32_t bti:8;
+        uint32_t sampler:4;
+        uint32_t msg_type:5;
+        uint32_t simd_mode:2;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad1:2;
+        uint32_t end_of_thread:1;
+      } sampler_gen7;
+
+      /**
+       * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+       *
+       * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+       **/
+      struct {
+        uint32_t bti:8;
+        uint32_t msg_control:5;
+        uint32_t msg_type:3;
+        uint32_t pad0:3;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad1:2;
+        uint32_t end_of_thread:1;
+      } gen6_dp_sampler_const_cache;
+
+      /*! Data port untyped read / write messages */
+      struct {
+        uint32_t bti:8;
+        uint32_t rgba:4;
+        uint32_t simd_mode:2;
+        uint32_t msg_type:4;
+        uint32_t category:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } gen7_untyped_rw;
+
+      /*! Data port byte scatter / gather */
+      struct {
+        uint32_t bti:8;
+        uint32_t simd_mode:1;
+        uint32_t ignored0:1;
+        uint32_t data_size:2;
+        uint32_t ignored1:2;
+        uint32_t msg_type:4;
+        uint32_t category:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } gen7_byte_rw;
+
+      /*! Data port Scratch Read/ write */
+      struct {
+        uint32_t offset:12;
+        uint32_t block_size:2;
+        uint32_t ignored0:1;
+        uint32_t invalidate_after_read:1;
+        uint32_t channel_mode:1;
+        uint32_t msg_type:1;
+        uint32_t category:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } gen7_scratch_rw;
+
+      /*! Data port OBlock read / write */
+      struct {
+        uint32_t bti:8;
+        uint32_t block_size:3;
+        uint32_t ignored:2;
+        uint32_t invalidate_after_read:1;
+        uint32_t msg_type:4;
+        uint32_t category:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } gen7_oblock_rw;
+
+      /*! Data port dword scatter / gather */
+      struct {
+        uint32_t bti:8;
+        uint32_t block_size:2;
+        uint32_t ignored0:3;
+        uint32_t invalidate_after_read:1;
+        uint32_t msg_type:4;
+        uint32_t ignored1:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad2:2;
+        uint32_t end_of_thread:1;
+      } gen7_dword_rw;
+
+      /*! Data port typed read / write messages */
+      struct {
+        uint32_t bti:8;
+        uint32_t chan_mask:4;
+        uint32_t pad:1;
+        uint32_t slot:1;
+        uint32_t msg_type:4;
+        uint32_t pad2:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad3:2;
+        uint32_t end_of_thread:1;
+      } gen7_typed_rw;
+
+      /*! Memory fence */
+      struct {
+        uint32_t bti:8;
+        uint32_t pad:5;
+        uint32_t commit_enable:1;
+        uint32_t msg_type:4;
+        uint32_t pad2:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad3:2;
+        uint32_t end_of_thread:1;
+      } gen7_memory_fence;
+
+      /*! atomic messages */
+      struct {
+        uint32_t bti:8;
+        uint32_t aop_type:4;
+        uint32_t simd_mode:1;
+        uint32_t return_data:1;
+        uint32_t msg_type:4;
+        uint32_t category:1;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad3:2;
+        uint32_t end_of_thread:1;
+      } gen7_atomic_op;
+
+      struct {
+        uint32_t src1_subreg_nr_high:1;
+        uint32_t src1_reg_nr:8;
+        uint32_t pad0:1;
+        uint32_t src2_rep_ctrl:1;
+        uint32_t src2_swizzle:8;
+        uint32_t src2_subreg_nr:3;
+        uint32_t src2_reg_nr:8;
+        uint32_t pad1:2;
+      } da3src;
+
+      /*! Message gateway */
+      struct {
+        uint32_t subfunc:3;
+        uint32_t pad:11;
+        uint32_t ackreq:1;
+        uint32_t notify:2;
+        uint32_t pad2:2;
+        uint32_t header_present:1;
+        uint32_t response_length:5;
+        uint32_t msg_length:4;
+        uint32_t pad3:2;
+        uint32_t end_of_thread:1;
+      } gen7_msg_gw;
+
+      struct {
+        uint32_t jip:16;
+        uint32_t uip:16;
+      } gen7_branch;
+
+      int d;
+      uint32_t ud;
+      float f;
+    } bits3;
+  };
 };
 
 #endif /* __GEN_DEFS_HPP__ */
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 9df031e..8b5057e 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -51,8 +51,11 @@
 #include "backend/gen_encoder.hpp"
 #include <cstring>
 
+
 namespace gbe
 {
+  extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
+  extern bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split);
   //////////////////////////////////////////////////////////////////////////
   // Some helper functions to encode
   //////////////////////////////////////////////////////////////////////////
@@ -91,7 +94,7 @@ namespace gbe
   }
 
   static void setMessageDescriptor(GenEncoder *p,
-                                   GenInstruction *inst,
+                                   GenNativeInstruction *inst,
                                    enum GenMessageTarget sfid,
                                    unsigned msg_length,
                                    unsigned response_length,
@@ -107,7 +110,7 @@ namespace gbe
   }
 
   static void setDPUntypedRW(GenEncoder *p,
-                             GenInstruction *insn,
+                             GenNativeInstruction *insn,
                              uint32_t bti,
                              uint32_t rgba,
                              uint32_t msg_type,
@@ -128,7 +131,7 @@ namespace gbe
   }
 
   static void setDPByteScatterGather(GenEncoder *p,
-                                     GenInstruction *insn,
+                                     GenNativeInstruction *insn,
                                      uint32_t bti,
                                      uint32_t elem_size,
                                      uint32_t msg_type,
@@ -149,7 +152,7 @@ namespace gbe
   }
 #if 0
   static void setOBlockRW(GenEncoder *p,
-                          GenInstruction *insn,
+                          GenNativeInstruction *insn,
                           uint32_t bti,
                           uint32_t size,
                           uint32_t msg_type,
@@ -167,7 +170,7 @@ namespace gbe
 #endif
 
   static void setSamplerMessage(GenEncoder *p,
-                                GenInstruction *insn,
+                                GenNativeInstruction *insn,
                                 unsigned char bti,
                                 unsigned char sampler,
                                 uint32_t msg_type,
@@ -187,7 +190,7 @@ namespace gbe
 
 
   static void setTypedWriteMessage(GenEncoder *p,
-                                   GenInstruction *insn,
+                                   GenNativeInstruction *insn,
                                    unsigned char bti,
                                    unsigned char msg_type,
                                    uint32_t msg_length,
@@ -199,7 +202,7 @@ namespace gbe
      insn->bits3.gen7_typed_rw.msg_type = msg_type;
   }
   static void setDWordScatterMessgae(GenEncoder *p,
-                                     GenInstruction *insn,
+                                     GenNativeInstruction *insn,
                                      uint32_t bti,
                                      uint32_t block_size,
                                      uint32_t msg_type,
@@ -238,7 +241,7 @@ namespace gbe
     curr = stack[--stateNum];
   }
 
-  void GenEncoder::setHeader(GenInstruction *insn) {
+  void GenEncoder::setHeader(GenNativeInstruction *insn) {
     if (this->curr.execWidth == 8)
       insn->header.execution_size = GEN_WIDTH_8;
     else if (this->curr.execWidth == 16)
@@ -260,7 +263,7 @@ namespace gbe
     insn->header.saturate = this->curr.saturate;
   }
 
-  void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) {
+  void GenEncoder::setDst(GenNativeInstruction *insn, GenRegister dest) {
      if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
         assert(dest.nr < 128);
 
@@ -274,7 +277,7 @@ namespace gbe
      insn->bits1.da1.dest_horiz_stride = dest.hstride;
   }
 
-  void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) {
+  void GenEncoder::setSrc0(GenNativeInstruction *insn, GenRegister reg) {
      if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
         assert(reg.nr < 128);
 
@@ -327,7 +330,7 @@ namespace gbe
     }
   }
 
-  void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) {
+  void GenEncoder::setSrc1(GenNativeInstruction *insn, GenRegister reg) {
      assert(reg.nr < 128);
      assert(reg.file != GEN_ARCHITECTURE_REGISTER_FILE || reg.nr == 0);
 
@@ -442,7 +445,7 @@ namespace gbe
   }
 
   void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
@@ -469,7 +472,7 @@ namespace gbe
   }
 
   void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
@@ -495,7 +498,7 @@ namespace gbe
   }
 
   void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
     if (this->curr.execWidth == 8) {
@@ -521,7 +524,7 @@ namespace gbe
   }
 
   void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
     this->setHeader(insn);
@@ -545,7 +548,7 @@ namespace gbe
   }
 
   void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
     uint32_t block_size = 0;
@@ -575,7 +578,7 @@ namespace gbe
   }
 
   void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     uint32_t msg_length = 0;
     uint32_t response_length = 0;
 
@@ -608,13 +611,21 @@ namespace gbe
       NOT_SUPPORTED;
 
   }
+  GenCompactInstruction *GenEncoder::nextCompact(uint32_t opcode) {
+    GenCompactInstruction insn;
+    std::memset(&insn, 0, sizeof(GenCompactInstruction));
+    insn.bits1.opcode = opcode;
+    this->store.push_back(insn.low);
+    return (GenCompactInstruction *)&this->store.back();
+  }
 
-  GenInstruction *GenEncoder::next(uint32_t opcode) {
-     GenInstruction insn;
-     std::memset(&insn, 0, sizeof(GenInstruction));
+  GenNativeInstruction *GenEncoder::next(uint32_t opcode) {
+     GenNativeInstruction insn;
+     std::memset(&insn, 0, sizeof(GenNativeInstruction));
      insn.header.opcode = opcode;
-     this->store.push_back(insn);
-     return &this->store.back();
+     this->store.push_back(insn.low);
+     this->store.push_back(insn.high);
+     return (GenNativeInstruction *)(&this->store.back()-1);
   }
 
   INLINE void _handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
@@ -622,7 +633,7 @@ namespace gbe
        int w = p->curr.execWidth;
        p->push();
        p->curr.nibControl = 0;
-       GenInstruction *insn = p->next(opcode);
+       GenNativeInstruction *insn = p->next(opcode);
        p->setHeader(insn);
        p->setDst(insn, dst);
        p->setSrc0(insn, src0);
@@ -678,7 +689,9 @@ namespace gbe
        }
        p->pop();
      } else if (needToSplitAlu1(p, dst, src) == false) {
-       GenInstruction *insn = p->next(opcode);
+      if(compactAlu1(p, opcode, dst, src, condition, false))
+        return;
+       GenNativeInstruction *insn = p->next(opcode);
        if (condition != 0) {
          GBE_ASSERT(opcode == GEN_OPCODE_MOV ||
                     opcode == GEN_OPCODE_NOT);
@@ -688,7 +701,7 @@ namespace gbe
        p->setDst(insn, dst);
        p->setSrc0(insn, src);
      } else {
-       GenInstruction *insnQ1, *insnQ2;
+       GenNativeInstruction *insnQ1, *insnQ2;
 
        // Instruction for the first quarter
        insnQ1 = p->next(opcode);
@@ -718,7 +731,9 @@ namespace gbe
     if (dst.isdf() && src0.isdf() && src1.isdf()) {
        handleDouble(p, opcode, dst, src0, src1);
     } else if (needToSplitAlu2(p, dst, src0, src1) == false) {
-       GenInstruction *insn = p->next(opcode);
+       if(compactAlu2(p, opcode, dst, src0, src1, condition, false))
+         return;
+       GenNativeInstruction *insn = p->next(opcode);
        if (condition != 0) {
          GBE_ASSERT(opcode == GEN_OPCODE_OR ||
                     opcode == GEN_OPCODE_XOR ||
@@ -730,7 +745,7 @@ namespace gbe
        p->setSrc0(insn, src0);
        p->setSrc1(insn, src1);
     } else {
-       GenInstruction *insnQ1, *insnQ2;
+       GenNativeInstruction *insnQ1, *insnQ2;
 
        // Instruction for the first quarter
        insnQ1 = p->next(opcode);
@@ -754,14 +769,14 @@ namespace gbe
 
 #define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6))
 
-  static GenInstruction *alu3(GenEncoder *p,
+  static GenNativeInstruction *alu3(GenEncoder *p,
                               uint32_t opcode,
                               GenRegister dest,
                               GenRegister src0,
                               GenRegister src1,
                               GenRegister src2)
   {
-     GenInstruction *insn = p->next(opcode);
+     GenNativeInstruction *insn = p->next(opcode);
 
      assert(dest.file == GEN_GENERAL_REGISTER_FILE);
      assert(dest.nr < 128);
@@ -811,7 +826,7 @@ namespace gbe
 
      // Emit second half of the instruction
      if (p->curr.execWidth == 16) {
-      GenInstruction q1Insn = *insn;
+      GenNativeInstruction q1Insn = *insn;
       insn = p->next(opcode);
       *insn = q1Insn;
       insn->header.quarter_control = GEN_COMPRESSION_Q2;
@@ -1048,14 +1063,14 @@ namespace gbe
 
 
   void GenEncoder::NOP(void) {
-    GenInstruction *insn = this->next(GEN_OPCODE_NOP);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_NOP);
     this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
     this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
     this->setSrc1(insn, GenRegister::immud(0x0));
   }
 
   void GenEncoder::BARRIER(GenRegister src) {
-     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, GenRegister::null());
      this->setSrc0(insn, src);
@@ -1064,7 +1079,7 @@ namespace gbe
      insn->bits3.msg_gateway.notify = 0x1;
   }
   void GenEncoder::FENCE(GenRegister dst) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     this->setHeader(insn);
     this->setDst(insn, dst);
     this->setSrc0(insn, dst);
@@ -1090,7 +1105,7 @@ namespace gbe
   ALU2_BRA(BRC)
 
   void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
-    GenInstruction &insn = this->store[insnID];
+    GenNativeInstruction &insn = *(GenNativeInstruction *)&this->store[insnID];
     GBE_ASSERT(insnID < this->store.size());
     GBE_ASSERT(insn.header.opcode == GEN_OPCODE_JMPI ||
                insn.header.opcode == GEN_OPCODE_BRD  ||
@@ -1118,7 +1133,7 @@ namespace gbe
       // for all the branching instruction. And need to adjust the distance
       // for those branch instruction's start point and end point contains
       // this instruction.
-      GenInstruction &insn2 = this->store[insnID+1];
+      GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
       GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
       insn.header.opcode = GEN_OPCODE_ADD;
       this->setDst(&insn, GenRegister::ip());
@@ -1127,7 +1142,7 @@ namespace gbe
     } else {
       insn.header.predicate_inverse ^= 1;
       this->setSrc1(&insn, GenRegister::immd(2));
-      GenInstruction &insn2 = this->store[insnID+1];
+      GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
       GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
       GBE_ASSERT(insnID < this->store.size());
       insn2.header.predicate_control = GEN_PREDICATE_NONE;
@@ -1140,7 +1155,10 @@ namespace gbe
 
   void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
     if (needToSplitCmp(this, src0, src1) == false) {
-      GenInstruction *insn = this->next(GEN_OPCODE_CMP);
+      if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
+        return;
+      }
+      GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
       this->setHeader(insn);
       insn->header.destreg_or_condmod = conditional;
       insn->header.thread_control = GEN_THREAD_SWITCH;
@@ -1148,7 +1166,7 @@ namespace gbe
       this->setSrc0(insn, src0);
       this->setSrc1(insn, src1);
     } else {
-      GenInstruction *insnQ1, *insnQ2;
+      GenNativeInstruction *insnQ1, *insnQ2;
 
       // Instruction for the first quarter
       insnQ1 = this->next(GEN_OPCODE_CMP);
@@ -1177,7 +1195,7 @@ namespace gbe
                            GenRegister src0,
                            GenRegister src1)
   {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEL);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEL);
     GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
     this->setHeader(insn);
     insn->header.destreg_or_condmod = conditional;
@@ -1187,7 +1205,7 @@ namespace gbe
   }
 
   void GenEncoder::WAIT(void) {
-     GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
      GenRegister src = GenRegister::notification1();
      this->setDst(insn, GenRegister::null());
      this->setSrc0(insn, src);
@@ -1198,7 +1216,7 @@ namespace gbe
   }
 
   void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) {
-     GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
      assert(dst.file == GEN_GENERAL_REGISTER_FILE);
      assert(src0.file == GEN_GENERAL_REGISTER_FILE);
      assert(src1.file == GEN_GENERAL_REGISTER_FILE);
@@ -1226,7 +1244,7 @@ namespace gbe
         insn->header.quarter_control = GEN_COMPRESSION_Q1;
 
         if(this->curr.execWidth == 16) {
-          GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
+          GenNativeInstruction *insn2 = this->next(GEN_OPCODE_MATH);
           GenRegister new_dest, new_src0, new_src1;
           new_dest = GenRegister::QnPhysical(dst, 1);
           new_src0 = GenRegister::QnPhysical(src0, 1);
@@ -1244,7 +1262,7 @@ namespace gbe
   }
 
   void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
-     GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
      assert(dst.file == GEN_GENERAL_REGISTER_FILE);
      assert(src.file == GEN_GENERAL_REGISTER_FILE);
      assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
@@ -1275,7 +1293,7 @@ namespace gbe
        msg_length++;
      uint32_t simd_mode = (simdWidth == 16) ?
                             GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
-     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, dest);
      this->setSrc0(insn, msg);
@@ -1287,7 +1305,7 @@ namespace gbe
 
   void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
   {
-     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      uint32_t msg_type = GEN_TYPED_WRITE;
      uint32_t msg_length = header_present ? 9 : 8;
      this->setHeader(insn);
@@ -1296,7 +1314,7 @@ namespace gbe
      setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
   }
   static void setScratchMessage(GenEncoder *p,
-                                   GenInstruction *insn,
+                                   GenNativeInstruction *insn,
                                    uint32_t offset,
                                    uint32_t block_size,
                                    uint32_t channel_mode,
@@ -1317,7 +1335,7 @@ namespace gbe
   {
      assert(src_num == 1 || src_num ==2);
      uint32_t block_size = src_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
-     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
      this->setSrc0(insn, msg);
@@ -1330,7 +1348,7 @@ namespace gbe
   {
      assert(dst_num == 1 || dst_num ==2);
      uint32_t block_size = dst_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
-     GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, dst);
      this->setSrc0(insn, src);
@@ -1340,7 +1358,7 @@ namespace gbe
   }
 
   void GenEncoder::EOT(uint32_t msg) {
-    GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+    GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
     this->setSrc0(insn, GenRegister::ud8grf(msg,0));
     this->setSrc1(insn, GenRegister::immud(0));
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 50662fb..4c65a9c 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -202,11 +202,12 @@ namespace gbe
     ////////////////////////////////////////////////////////////////////////
     // Helper functions to encode
     ////////////////////////////////////////////////////////////////////////
-    void setHeader(GenInstruction *insn);
-    void setDst(GenInstruction *insn, GenRegister dest);
-    void setSrc0(GenInstruction *insn, GenRegister reg);
-    void setSrc1(GenInstruction *insn, GenRegister reg);
-    GenInstruction *next(uint32_t opcode);
+    void setHeader(GenNativeInstruction *insn);
+    void setDst(GenNativeInstruction *insn, GenRegister dest);
+    void setSrc0(GenNativeInstruction *insn, GenRegister reg);
+    void setSrc1(GenNativeInstruction *insn, GenRegister reg);
+    GenCompactInstruction *nextCompact(uint32_t opcode);
+    GenNativeInstruction *next(uint32_t opcode);
     uint32_t n_instruction(void) const { return store.size(); }
     GBE_CLASS(GenEncoder); //!< Use custom allocators
   };
diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
new file mode 100644
index 0000000..5b0a897
--- /dev/null
+++ b/backend/src/backend/gen_insn_compact.cpp
@@ -0,0 +1,521 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+#include "backend/gen_defs.hpp"
+#include "backend/gen_encoder.hpp"
+#include <cstring>
+
+namespace gbe {
+
+  struct compact_table_entry {
+    uint32_t bit_pattern;
+    uint32_t index;
+  };
+
+  static compact_table_entry control_table[] = {
+    {0b0000000000000000010, 0},
+    {0b0000100000000000000, 1},
+    {0b0000100000000000001, 2},
+    {0b0000100000000000010, 3},
+    {0b0000100000000000011, 4},
+    {0b0000100000000000100, 5},
+    {0b0000100000000000101, 6},
+    {0b0000100000000000111, 7},
+    {0b0000100000000001000, 8},
+    {0b0000100000000001001, 9},
+    {0b0000100000000001101, 10},
+    {0b0000110000000000000, 11},
+    {0b0000110000000000001, 12},
+    {0b0000110000000000010, 13},
+    {0b0000110000000000011, 14},
+    {0b0000110000000000100, 15},
+    {0b0000110000000000101, 16},
+    {0b0000110000000000111, 17},
+    {0b0000110000000001001, 18},
+    {0b0000110000000001101, 19},
+    {0b0000110000000010000, 20},
+    {0b0000110000100000000, 21},
+    {0b0001000000000000000, 22},
+    {0b0001000000000000010, 23},
+    {0b0001000000000000100, 24},
+    {0b0001000000100000000, 25},
+    {0b0010110000000000000, 26},
+    {0b0010110000000010000, 27},
+    {0b0011000000000000000, 28},
+    {0b0011000000100000000, 29},
+    {0b0101000000000000000, 30},
+    {0b0101000000100000000, 31},
+  };
+
+  static compact_table_entry data_type_table[] = {
+    {0b000000001000001100, 20},
+    {0b001000000000000001, 0},
+    {0b001000000000100000, 1},
+    {0b001000000000100001, 2},
+    {0b001000000000111101, 21},
+    {0b001000000001100001, 3},
+    {0b001000000010100101, 22},
+    {0b001000000010111101, 4},
+    {0b001000001011111101, 5},
+    {0b001000001110100001, 6},
+    {0b001000001110100101, 7},
+    {0b001000001110111101, 8},
+    {0b001000010000100000, 23},
+    {0b001000010000100001, 9},
+    {0b001000110000100000, 10},
+    {0b001000110000100001, 11},
+    {0b001001010010100100, 24},
+    {0b001001010010100101, 12},
+    {0b001001110010000100, 25},
+    {0b001001110010100100, 13},
+    {0b001001110010100101, 14},
+    {0b001010010100001001, 26},
+    {0b001010010100101000, 30},
+    {0b001010110100101000, 31},
+    {0b001011110110101100, 29},
+    {0b001101111110111101, 27},
+    {0b001111001110111101, 15},
+    {0b001111011110011101, 16},
+    {0b001111011110111100, 17},
+    {0b001111011110111101, 18},
+    {0b001111111110111100, 19},
+    {0b001111111110111101, 28},
+  };
+
+  static compact_table_entry data_type_decompact[] = {
+    {0b001000000000000001, 0},
+    {0b001000000000100000, 1},
+    {0b001000000000100001, 2},
+    {0b001000000001100001, 3},
+    {0b001000000010111101, 4},
+    {0b001000001011111101, 5},
+    {0b001000001110100001, 6},
+    {0b001000001110100101, 7},
+    {0b001000001110111101, 8},
+    {0b001000010000100001, 9},
+    {0b001000110000100000, 10},
+    {0b001000110000100001, 11},
+    {0b001001010010100101, 12},
+    {0b001001110010100100, 13},
+    {0b001001110010100101, 14},
+    {0b001111001110111101, 15},
+    {0b001111011110011101, 16},
+    {0b001111011110111100, 17},
+    {0b001111011110111101, 18},
+    {0b001111111110111100, 19},
+    {0b000000001000001100, 20},
+    {0b001000000000111101, 21},
+    {0b001000000010100101, 22},
+    {0b001000010000100000, 23},
+    {0b001001010010100100, 24},
+    {0b001001110010000100, 25},
+    {0b001010010100001001, 26},
+    {0b001101111110111101, 27},
+    {0b001111111110111101, 28},
+    {0b001011110110101100, 29},
+    {0b001010010100101000, 30},
+    {0b001010110100101000, 31},
+  };
+
+  static compact_table_entry subreg_table[] = {
+    {0b000000000000000, 0},
+    {0b000000000000001, 1},
+    {0b000000000001000, 2},
+    {0b000000000001111, 3},
+    {0b000000000010000, 4},
+    {0b000000010000000, 5},
+    {0b000000100000000, 6},
+    {0b000000110000000, 7},
+    {0b000001000000000, 8},
+    {0b000001000010000, 9},
+    {0b000001010000000, 10},
+    {0b001000000000000, 11},
+    {0b001000000000001, 12},
+    {0b001000010000001, 13},
+    {0b001000010000010, 14},
+    {0b001000010000011, 15},
+    {0b001000010000100, 16},
+    {0b001000010000111, 17},
+    {0b001000010001000, 18},
+    {0b001000010001110, 19},
+    {0b001000010001111, 20},
+    {0b001000110000000, 21},
+    {0b001000111101000, 22},
+    {0b010000000000000, 23},
+    {0b010000110000000, 24},
+    {0b011000000000000, 25},
+    {0b011110010000111, 26},
+    {0b100000000000000, 27},
+    {0b101000000000000, 28},
+    {0b110000000000000, 29},
+    {0b111000000000000, 30},
+    {0b111000000011100, 31},
+  };
+
+  static compact_table_entry srcreg_table[] = {
+    {0b000000000000, 0},
+    {0b000000000010, 1},
+    {0b000000010000, 2},
+    {0b000000010010, 3},
+    {0b000000011000, 4},
+    {0b000000100000, 5},
+    {0b000000101000, 6},
+    {0b000001001000, 7},
+    {0b000001010000, 8},
+    {0b000001110000, 9},
+    {0b000001111000, 10},
+    {0b001100000000, 11},
+    {0b001100000010, 12},
+    {0b001100001000, 13},
+    {0b001100010000, 14},
+    {0b001100010010, 15},
+    {0b001100100000, 16},
+    {0b001100101000, 17},
+    {0b001100111000, 18},
+    {0b001101000000, 19},
+    {0b001101000010, 20},
+    {0b001101001000, 21},
+    {0b001101010000, 22},
+    {0b001101100000, 23},
+    {0b001101101000, 24},
+    {0b001101110000, 25},
+    {0b001101110001, 26},
+    {0b001101111000, 27},
+    {0b010001101000, 28},
+    {0b010001101001, 29},
+    {0b010001101010, 30},
+    {0b010110001000, 31},
+  };
+
+  static int cmp_key(const void *p1, const void*p2) {
+    const compact_table_entry * px = (compact_table_entry *)p1;
+    const compact_table_entry * py = (compact_table_entry *)p2;
+    return (px->bit_pattern) - py->bit_pattern;
+  }
+  union ControlBits{
+    struct {
+      uint32_t access_mode:1;
+      uint32_t mask_control:1;
+      uint32_t dependency_control:2;
+      uint32_t quarter_control:2;
+      uint32_t thread_control:2;
+      uint32_t predicate_control:4;
+      uint32_t predicate_inverse:1;
+      uint32_t execution_size:3;
+      uint32_t saturate:1;
+      uint32_t flag_sub_reg_nr:1;
+      uint32_t flag_reg_nr:1;
+      uint32_t pad:23;
+    };
+    uint32_t data;
+  };
+  union DataTypeBits{
+    struct {
+      uint32_t dest_reg_file:2;
+      uint32_t dest_reg_type:3;
+      uint32_t src0_reg_file:2;
+      uint32_t src0_reg_type:3;
+      uint32_t src1_reg_file:2;
+      uint32_t src1_reg_type:3;
+      uint32_t dest_horiz_stride:2;
+      uint32_t dest_address_mode:1;
+      uint32_t pad:14;
+    };
+    uint32_t data;
+  };
+  union SubRegBits {
+    struct {
+      uint32_t dest_subreg_nr:5;
+      uint32_t src0_subreg_nr:5;
+      uint32_t src1_subreg_nr:5;
+      uint32_t pad:17;
+    };
+    uint32_t data;
+  };
+  union SrcRegBits {
+    struct {
+      uint32_t src_abs:1;
+      uint32_t src_negate:1;
+      uint32_t src_address_mode:1;
+      uint32_t src_horiz_stride:2;
+      uint32_t src_width:3;
+      uint32_t src_vert_stride:4;
+      uint32_t pad:20;
+    };
+    uint32_t data;
+  };
+
+  void decompactInstruction(GenCompactInstruction * p, GenNativeInstruction *pOut) {
+
+    memset(pOut, 0, sizeof(GenNativeInstruction));
+    union ControlBits control_bits;
+    control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern;
+    pOut->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8);
+    pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod;
+    pOut->header.saturate = control_bits.saturate;
+    pOut->header.acc_wr_control = p->bits1.acc_wr_control;
+    pOut->header.cmpt_control = p->bits1.cmpt_control;
+    pOut->header.debug_control = p->bits1.debug_control;
+
+    union DataTypeBits data_type_bits;
+    union SubRegBits subreg_bits;
+    union SrcRegBits src0_bits;
+    data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern;
+    subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern;
+    src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern;
+
+    pOut->low.high |= data_type_bits.data & 0x7fff;
+    pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride;
+    pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode;
+    pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr;
+    pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr;
+
+    pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr;
+    pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr;
+    pOut->high.low |= (src0_bits.data << 13);
+    pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr;
+    pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr;
+
+    if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) {
+      uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8);
+      pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm;
+    } else {
+      union SrcRegBits src1_bits;
+      src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern;
+      pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr;
+      pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr;
+      pOut->high.high |= (src1_bits.data << 13);
+    }
+  }
+
+  int compactControlBits(GenEncoder *p, uint32_t quarter, uint32_t execWidth) {
+
+    const GenInstructionState *s = &p->curr;
+    // some quick check
+    if(s->nibControl != 0)
+      return -1;
+    if(s->predicate > GEN_PREDICATE_NORMAL)
+      return -1;
+    if(s->flag == 1)
+      return -1;
+
+    ControlBits b;
+    b.data = 0;
+
+    if (execWidth == 8)
+      b.execution_size = GEN_WIDTH_8;
+    else if (execWidth == 16)
+      b.execution_size = GEN_WIDTH_16;
+    else if (execWidth == 1)
+      b.execution_size = GEN_WIDTH_1;
+    else
+      NOT_IMPLEMENTED;
+
+    b.mask_control = s->noMask;
+    b.quarter_control = quarter;
+    b.predicate_control = s->predicate;
+    b.predicate_inverse = s->inversePredicate;
+
+    b.saturate = s->saturate;
+    b.flag_sub_reg_nr = s->subFlag;
+    b.flag_reg_nr = s->flag;
+
+    compact_table_entry key;
+    key.bit_pattern = b.data;
+
+    compact_table_entry *r = (compact_table_entry *)bsearch(&key, control_table,
+      sizeof(control_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+    if (r == NULL)
+      return -1;
+    return r->index;
+  }
+
+  int compactDataTypeBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
+
+    // compact does not support any indirect acess
+    if(dst->address_mode != GEN_ADDRESS_DIRECT)
+      return -1;
+
+    if(src0->file == GEN_IMMEDIATE_VALUE)
+      return -1;
+
+    DataTypeBits b;
+    b.data = 0;
+
+    b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride;
+    b.dest_address_mode = dst->address_mode;
+    b.dest_reg_file = dst->file;
+    b.dest_reg_type = dst->type;
+
+    b.src0_reg_file = src0->file;
+    b.src0_reg_type = src0->type;
+
+    if(src1) {
+      b.src1_reg_type = src1->type;
+      b.src1_reg_file = src1->file;
+    } else {
+      // default to zero
+      b.src1_reg_type = 0;
+      b.src1_reg_file = 0;
+    }
+
+    compact_table_entry key;
+    key.bit_pattern = b.data;
+
+    compact_table_entry *r = (compact_table_entry *)bsearch(&key, data_type_table,
+                             sizeof(data_type_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+    if (r == NULL)
+      return -1;
+    return r->index;
+  }
+  int compactSubRegBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
+    SubRegBits b;
+    b.data = 0;
+    b.dest_subreg_nr = dst->subnr;
+    b.src0_subreg_nr = src0->subnr;
+    if(src1)
+      b.src1_subreg_nr = src1->subnr;
+    else
+      b.src1_subreg_nr = 0;
+
+    compact_table_entry key;
+    key.bit_pattern = b.data;
+
+    compact_table_entry *r = (compact_table_entry *)bsearch(&key, subreg_table,
+                sizeof(subreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+    if (r == NULL)
+      return -1;
+    return r->index;
+  }
+  int compactSrcRegBits(GenEncoder *p, GenRegister *src) {
+    // As we only use GEN_ALIGN_1 and compact only support direct register access,
+    // we only need to verify [hstride, width, vstride]
+    if(src->file == GEN_IMMEDIATE_VALUE)
+      return -1;
+    if(src->address_mode != GEN_ADDRESS_DIRECT)
+      return -1;
+
+    SrcRegBits b;
+    b.data = 0;
+    b.src_abs = src->absolute;
+    b.src_negate = src->negation;
+    b.src_address_mode = src->address_mode;
+    if(p->curr.execWidth == 1 && src->width == GEN_WIDTH_1) {
+      b.src_width = src->width;
+      b.src_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+      b.src_vert_stride = GEN_VERTICAL_STRIDE_0;
+    }
+    else {
+      b.src_horiz_stride = src->hstride;
+      b.src_width = src->width;
+      b.src_vert_stride = src->vstride;
+    }
+    compact_table_entry key;
+    key.bit_pattern = b.data;
+
+    compact_table_entry *r = (compact_table_entry *)bsearch(&key, srcreg_table,
+                    sizeof(srcreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+    if (r == NULL)
+      return -1;
+    return r->index;
+  }
+
+  bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split) {
+    if(split) {
+      // TODO support it
+      return false;
+    } else {
+      int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
+      if(control_index == -1) return false;
+
+      int data_type_index = compactDataTypeBits(p, &dst, &src, NULL);
+      if(data_type_index == -1) return false;
+
+      int sub_reg_index = compactSubRegBits(p, &dst, &src, NULL);
+      if(sub_reg_index == -1) return false;
+
+      int src_reg_index = compactSrcRegBits(p, &src);
+      if(src_reg_index == -1) return false;
+
+      GenCompactInstruction * insn = p->nextCompact(opcode);
+      insn->bits1.control_index = control_index;
+      insn->bits1.data_type_index = data_type_index;
+      insn->bits1.sub_reg_index = sub_reg_index;
+      insn->bits1.acc_wr_control = p->curr.accWrEnable;
+      insn->bits1.destreg_or_condmod = condition;
+      insn->bits1.cmpt_control = 1;
+      insn->bits1.src0_index_lo = src_reg_index & 3;
+
+      insn->bits2.src0_index_hi = src_reg_index >> 2;
+      insn->bits2.src1_index = 0;
+      insn->bits2.dest_reg_nr = dst.nr;
+      insn->bits2.src0_reg_nr = src.nr;
+      insn->bits2.src1_reg_nr = 0;
+      return true;
+    }
+  }
+
+  bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split) {
+    if(split) {
+      // TODO support it
+      return false;
+    } else {
+      if(opcode == GEN_OPCODE_IF  || opcode == GEN_OPCODE_ENDIF || opcode == GEN_OPCODE_JMPI) return false;
+
+      int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
+      if(control_index == -1) return false;
+
+      int data_type_index = compactDataTypeBits(p, &dst, &src0, &src1);
+      if(data_type_index == -1) return false;
+
+      int sub_reg_index = compactSubRegBits(p, &dst, &src0, &src1);
+      if(sub_reg_index == -1) return false;
+
+      int src0_reg_index = compactSrcRegBits(p, &src0);
+      if(src0_reg_index == -1) return false;
+
+      bool src1_imm = false;
+      int src1_reg_index;
+      if(src1.file == GEN_IMMEDIATE_VALUE) {
+        if(src1.absolute != 0 || src1.negation != 0 || src1.type == GEN_TYPE_F)
+          return false;
+        if(src1.value.d < -4096 || src1.value.d > 4095) // 13bit signed imm
+          return false;
+        src1_imm = true;
+      } else {
+        src1_reg_index = compactSrcRegBits(p, &src1);
+        if(src1_reg_index == -1) return false;
+      }
+      GenCompactInstruction * insn = p->nextCompact(opcode);
+      insn->bits1.control_index = control_index;
+      insn->bits1.data_type_index = data_type_index;
+      insn->bits1.sub_reg_index = sub_reg_index;
+      insn->bits1.acc_wr_control = p->curr.accWrEnable;
+      insn->bits1.destreg_or_condmod = condition;
+      insn->bits1.cmpt_control = 1;
+      insn->bits1.src0_index_lo = src0_reg_index & 3;
+
+      insn->bits2.src0_index_hi = src0_reg_index >> 2;
+      insn->bits2.src1_index = src1_imm ? (src1.value.ud & 8191)>> 8 : src1_reg_index;
+      insn->bits2.dest_reg_nr = dst.nr;
+      insn->bits2.src0_reg_nr = src0.nr;
+      insn->bits2.src1_reg_nr = src1_imm ? (src1.value.ud & 0xff): src1.nr;
+      return true;
+    }
+  }
+};
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d0e3d0b..e04a2c2 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3089,7 +3089,7 @@ namespace gbe
       sel.push();
         sel.curr.noMask = 1;
         sel.curr.predicate = GEN_PREDICATE_NONE;
-        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1);
+        sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
       sel.pop();
 
       if (sel.block->hasBarrier) {
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 937f5b2..0794d48 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -524,7 +524,7 @@ namespace gbe
               cmp0->state.subFlag = insn.state.subFlag;
               cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
               cmp0->src(1) = GenRegister::immuw(0);
-              cmp0->dst(0) = GenRegister::null();
+              cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
               cmp0->extra.function = GEN_CONDITIONAL_NEQ;
               insn.prepend(*cmp0);
               validatedFlags.insert(insn.state.flagIndex);
@@ -545,7 +545,7 @@ namespace gbe
               cmp0->state.subFlag = insn.state.subFlag;
               cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
               cmp0->src(1) = GenRegister::immuw(0);
-              cmp0->dst(0) = GenRegister::null();
+              cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
               cmp0->extra.function = GEN_CONDITIONAL_NEQ;
               insn.prepend(*cmp0);
             }
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 0480dd8..6863aab 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -551,13 +551,13 @@ namespace gbe
 
     static INLINE GenRegister immuw(uint16_t uw) {
       GenRegister immediate = imm(GEN_TYPE_UW);
-      immediate.value.ud = uw | (uw << 16);
+      immediate.value.ud = uw;
       return immediate;
     }
 
     static INLINE GenRegister immw(int16_t w) {
       GenRegister immediate = imm(GEN_TYPE_W);
-      immediate.value.d = w | (w << 16);
+      immediate.value.d = w;
       return immediate;
     }
 
-- 
1.7.10.4



More information about the Beignet mailing list