[Beignet] [PATCH] generate MOV instruction at selection stage when do simd_shuffle with imm value.

Guo Yejun yejun.guo at intel.com
Thu Aug 27 16:05:56 PDT 2015


the earlier the instruction is generated, the more possible optimization
could be applied.

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen8_context.cpp       | 22 ++++++--------
 backend/src/backend/gen_context.cpp        | 46 +++++++++++++-----------------
 backend/src/backend/gen_insn_selection.cpp | 10 +++++--
 backend/src/backend/gen_register.hpp       |  8 ++++++
 4 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index b497ee5..f02786c 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -259,20 +259,14 @@ namespace gbe
     const GenRegister src0 = ra->genReg(insn.src(0));
     const GenRegister src1 = ra->genReg(insn.src(1));
     assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
-
-    uint32_t simd = p->curr.execWidth;
-    if (src1.file == GEN_IMMEDIATE_VALUE) {
-      uint32_t offset = src1.value.ud % simd;
-      GenRegister reg = GenRegister::suboffset(src0, offset);
-      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-    } else {
-      uint32_t base = src0.nr * 32 + src0.subnr * 4;
-      GenRegister baseReg = GenRegister::immuw(base);
-      const GenRegister a0 = GenRegister::addr8(0);
-      p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-      GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-      p->MOV(dst, indirect);
-    }
+    assert (src1.file != GEN_IMMEDIATE_VALUE);
+
+    uint32_t base = src0.nr * 32 + src0.subnr * 4;
+    GenRegister baseReg = GenRegister::immuw(base);
+    const GenRegister a0 = GenRegister::addr8(0);
+    p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+    GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+    p->MOV(dst, indirect);
   }
 
   void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 25fdf08..c2be7aa 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -585,35 +585,29 @@ namespace gbe
     const GenRegister src0 = ra->genReg(insn.src(0));
     const GenRegister src1 = ra->genReg(insn.src(1));
     assert(insn.opcode == SEL_OP_SIMD_SHUFFLE);
+    assert (src1.file != GEN_IMMEDIATE_VALUE);
 
+    uint32_t base = src0.nr * 32 + src0.subnr * 4;
+    GenRegister baseReg = GenRegister::immuw(base);
+    const GenRegister a0 = GenRegister::addr8(0);
     uint32_t simd = p->curr.execWidth;
-    if (src1.file == GEN_IMMEDIATE_VALUE) {
-      uint32_t offset = src1.value.ud % simd;
-      GenRegister reg = GenRegister::suboffset(src0, offset);
-      p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type));
-    } else {
-      uint32_t base = src0.nr * 32 + src0.subnr * 4;
-      GenRegister baseReg = GenRegister::immuw(base);
-      const GenRegister a0 = GenRegister::addr8(0);
+    p->push();
+      if (simd == 8) {
+        p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+        GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+        p->MOV(dst, indirect);
+      } else if (simd == 16) {
+        p->curr.execWidth = 8;
+        p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+        GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+        p->MOV(dst, indirect);
 
-      p->push();
-        if (simd == 8) {
-          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-          p->MOV(dst, indirect);
-        } else if (simd == 16) {
-          p->curr.execWidth = 8;
-          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
-          p->MOV(dst, indirect);
-
-          p->curr.quarterControl = 1;
-          p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
-          p->MOV(GenRegister::offset(dst, 1, 0), indirect);
-        } else
-          NOT_IMPLEMENTED;
-      p->pop();
-    }
+        p->curr.quarterControl = 1;
+        p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg);
+        p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+      } else
+        NOT_IMPLEMENTED;
+    p->pop();
   }
 
   void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ab00269..88d6dcc 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -5033,8 +5033,14 @@ namespace gbe
         sel.curr.predicate = GEN_PREDICATE_NONE;
         sel.curr.noMask = 1;
       }
-      if (src1.file == GEN_IMMEDIATE_VALUE)
-        sel.SIMD_SHUFFLE(dst, src0, src1);
+      if (src1.file == GEN_IMMEDIATE_VALUE) {
+        uint32_t offset = src1.value.ud % sel.curr.execWidth;
+        GenRegister reg = GenRegister::subphysicaloffset(src0, offset);
+        reg.vstride = GEN_VERTICAL_STRIDE_0;
+        reg.hstride = GEN_HORIZONTAL_STRIDE_0;
+        reg.width = GEN_WIDTH_1;
+        sel.MOV(dst, reg);
+      }
       else {
         GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
         sel.SHL(shiftL, src1, GenRegister::immud(0x2));
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 4f37e30..a63d693 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -946,6 +946,14 @@ namespace gbe
       return reg;
     }
 
+    static INLINE GenRegister subphysicaloffset(GenRegister reg, uint32_t delta) {
+      if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+        reg.subnr += delta * typeSize(reg.type) * hstride_size(reg);
+        reg.subphysical = 1;
+      }
+      return reg;
+    }
+
     static INLINE GenRegister df16(uint32_t file, uint32_t nr, uint32_t subnr) {
       return retype(vec16(file, nr, subnr), GEN_TYPE_DF);
     }
-- 
1.9.1



More information about the Beignet mailing list