[Beignet] [PATCH 2/2] GBE: refine the unalgined data gathering.

Song, Ruiling ruiling.song at intel.com
Tue Sep 2 19:21:35 PDT 2014


This patchset LGTM. But a typo in subject "unalgined" should be "unaligned". And I think it is better to keep the piece of code still in getEffectByteData() now.

-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Thursday, August 28, 2014 10:46 AM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH 2/2] GBE: refine the unalgined data gathering.

Save some unecessary duplicate instructions.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 43 +++++++++++++++++-------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1258e54..4be6372 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2888,7 +2888,9 @@ namespace gbe
                            vector<GenRegister> &effectData,
                            vector<GenRegister> &tmp,
                            uint32_t effectDataNum,
-                           GenRegister addr,
+                           GenRegister shiftL,
+                           GenRegister shiftH,
+                           ir::Register alignedFlag,
                            uint32_t simdWidth) const
     {
       using namespace ir;
@@ -2899,25 +2901,14 @@ namespace gbe
         for(uint32_t i = 0; i < effectDataNum; i++) {
           GenRegister tmpH = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
           GenRegister tmpL = effectData[i];
-          GenRegister shift = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
-          Register shift1Reg = sel.reg(FAMILY_DWORD);
-          GenRegister shift1 = GenRegister::udxgrf(simdWidth, shift1Reg);
-          GenRegister factor = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
-          sel.AND(shift, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(0x3));
-          sel.SHL(shift, shift, GenRegister::immud(0x3));
-          sel.SHR(tmpL, tmp[i], shift);
-          sel.ADD(shift1, GenRegister::negate(shift), GenRegister::immud(32));
+          sel.SHR(tmpL, tmp[i], shiftL);
           sel.push();
-            // Only need to consider the tmpH when the shift is not 32.
-            Register flag = sel.reg(FAMILY_BOOL);
-            sel.curr.physicalFlag = 0;
-            sel.curr.modFlag = 1;
-            sel.curr.predicate = GEN_PREDICATE_NONE;
-            sel.curr.flagIndex = (uint16_t)flag;
-            sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shift1Reg), GenRegister::immuw(32), factor);
+            // Only need to consider the tmpH when the addr is not aligned.
             sel.curr.modFlag = 0;
+            sel.curr.physicalFlag = 0;
+            sel.curr.flagIndex = (uint16_t)alignedFlag;
             sel.curr.predicate = GEN_PREDICATE_NORMAL;
-            sel.SHL(tmpH, tmp[i + 1], shift1);
+            sel.SHL(tmpH, tmp[i + 1], shiftH);
             sel.OR(effectData[i], tmpL, tmpH);
           sel.pop();
         }
@@ -2978,7 +2969,23 @@ namespace gbe
         for(uint32_t i = 0; i < effectDataNum; i++)
           effectData[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
 
-        getEffectByteData(sel, effectData, tmp, effectDataNum, address, simdWidth);
+        Register alignedFlag = sel.reg(FAMILY_BOOL);
+        GenRegister shiftL = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+        Register shiftHReg = sel.reg(FAMILY_DWORD);
+        GenRegister shiftH = GenRegister::udxgrf(simdWidth, shiftHReg);
+        sel.push();
+          if (simdWidth == 1)
+            sel.curr.noMask = 1;
+          sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3));
+          sel.SHL(shiftL, shiftL, GenRegister::immud(0x3));
+          sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32));
+          sel.curr.physicalFlag = 0;
+          sel.curr.modFlag = 1;
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.flagIndex = (uint16_t)alignedFlag;
+          sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shiftHReg), GenRegister::immuw(32));
+        sel.pop();
+        getEffectByteData(sel, effectData, tmp, effectDataNum, shiftL, shiftH, alignedFlag, simdWidth);
 
         for(uint32_t i = 0; i < effectDataNum; i++) {
           unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ?
-- 
1.8.3.2

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list