[Beignet] [Patch V2 1/3] OCL20/GBE: Fix 64bit pointer issue in Load store instruction selection.

Yang Rong rong.r.yang at intel.com
Fri Dec 4 00:30:30 PST 2015


From: Ruiling Song <ruiling.song at intel.com>

previously we do not handle 64bit pointer correctly.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 79 ++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 7a23892..1d4fcda 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3720,7 +3720,13 @@ namespace gbe
         if (sel.isScalarReg(addr.reg())) {
           sel.curr.noMask = 1;
         }
-        sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
+        if (sel.getRegisterFamily(addr.reg())) {
+          // as we still use offset instead of absolut graphics address,
+          // it is safe to convert from u64 to u32
+          GenRegister t = convertU64ToU32(sel, addr);
+          sel.SHR(addrDW, t, GenRegister::immud(2));
+        } else
+          sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
       sel.pop();
 
       sel.DWORD_GATHER(dst, addrDW, BTI_CONSTANT);
@@ -3797,6 +3803,7 @@ namespace gbe
         dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
 
       bool isUniform = sel.isScalarReg(insn.getValue(0));
+      unsigned addrBytes = typeSize(addr.type);
       AddressMode AM = insn.getAddressMode();
       vector<GenRegister> btiTemp = sel.getBTITemps(AM);
       sel.push();
@@ -3814,7 +3821,10 @@ namespace gbe
           read64Legacy(sel, addr, dst, b, btiTemp);
         } else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) {
           GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? 0xfe : BTI_CONSTANT);
-          read64Legacy(sel, addr, dst, b, btiTemp);
+          GenRegister addrDW = addr;
+          if (addrBytes == 8)
+            addrDW = convertU64ToU32(sel, addr);
+          read64Legacy(sel, addrDW, dst, b, btiTemp);
         } else {
           read64Stateless(sel, addr, dst);
         }
@@ -3830,9 +3840,12 @@ namespace gbe
                         ir::AddressSpace addrSpace) const
     {
       using namespace ir;
-        Register tmpReg = sel.reg(FAMILY_DWORD);
-        GenRegister tmpAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+        RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+        Type addrType = getType(addrFamily);
+        Register tmpReg = sel.reg(FAMILY_DWORD, isUniform);
+        GenRegister tmpAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType);
         GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
+        GenRegister addrOffset = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
 
         // Get dword aligned addr
         sel.push();
@@ -3840,7 +3853,11 @@ namespace gbe
             sel.curr.noMask = 1;
             sel.curr.execWidth = 1;
           }
-          sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+          if (addrFamily == FAMILY_DWORD)
+            sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+          else
+            sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UL), GenRegister::immuint64(0xfffffffffffffffc));
+
         sel.pop();
         sel.push();
           vector<GenRegister> tmp;
@@ -3852,9 +3869,13 @@ namespace gbe
           if (isUniform)
             sel.curr.execWidth = 1;
           // Get the remaining offset from aligned addr
-          sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
-          sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
-          sel.SHR(tmpData, tmpData, tmpAddr);
+          if (addrFamily == FAMILY_QWORD) {
+            sel.AND(addrOffset, sel.unpacked_ud(address.reg()), GenRegister::immud(0x3));
+          } else {
+            sel.AND(addrOffset, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+          }
+          sel.SHL(addrOffset, addrOffset, GenRegister::immud(0x3));
+          sel.SHR(tmpData, tmpData, addrOffset);
 
           if (elemSize == GEN_BYTE_SCATTER_WORD)
             sel.MOV(GenRegister::retype(dst, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg, isUniform, sel.isLongReg(tmpReg)));
@@ -3908,6 +3929,7 @@ namespace gbe
       using namespace ir;
       GBE_ASSERT(effectData.size() == effectDataNum);
       GBE_ASSERT(tmp.size() == effectDataNum + 1);
+      RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
       sel.push();
         Register alignedFlag = sel.reg(FAMILY_BOOL, isUniform);
         GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
@@ -3916,7 +3938,12 @@ namespace gbe
         sel.push();
           if (isUniform)
             sel.curr.noMask = 1;
-          sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3));
+          if (addrFamily == FAMILY_QWORD) {
+            GenRegister t = convertU64ToU32(sel, address);
+            sel.AND(shiftL, t, GenRegister::immud(0x3));
+          } else {
+            sel.AND(shiftL, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+          }
           sel.SHL(shiftL, shiftL, GenRegister::immud(0x3));
           sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32));
           sel.curr.physicalFlag = 0;
@@ -4026,6 +4053,8 @@ namespace gbe
                                  1 : sel.ctx.getSimdWidth();
       const bool isUniform = simdWidth == 1;
       RegisterFamily family = getFamily(insn.getValueType());
+      RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+      Type addrType = getType(addrFamily);
 
       if(valueNum > 1) {
         GBE_ASSERT(!isUniform && "vector load should not be uniform. Something went wrong.");
@@ -4041,11 +4070,14 @@ namespace gbe
         for(uint32_t i = 0; i < effectDataNum + 1; i++)
           tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
 
-        GenRegister alignedAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+        GenRegister alignedAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType);
         sel.push();
           if (isUniform)
             sel.curr.noMask = 1;
-          sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+          if (addrFamily == FAMILY_DWORD)
+            sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+          else
+            sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UL), GenRegister::immuint64(~0x3ul));
         sel.pop();
 
         uint32_t remainedReg = effectDataNum + 1;
@@ -4057,7 +4089,10 @@ namespace gbe
             sel.push();
               if (isUniform)
                 sel.curr.noMask = 1;
-              sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+              if (addrFamily == FAMILY_DWORD)
+                sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+              else
+                sel.ADD(alignedAddr, alignedAddr, GenRegister::immuint64(pos * 4));
             sel.pop();
           }
           shootUntypedReadMsg(sel, insn, t1, alignedAddr, width, addrSpace);
@@ -4110,7 +4145,8 @@ namespace gbe
     {
       using namespace ir;
       const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn);
-      GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+      Register reg = insn.getAddressRegister();
+      GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
       GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
                  insn.getAddressSpace() == MEM_CONSTANT ||
                  insn.getAddressSpace() == MEM_PRIVATE ||
@@ -4198,7 +4234,6 @@ namespace gbe
         for (unsigned k = 0; k < (valueNum+1)/2+1; k++) {
           msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
         }
-        bool valueScalar = sel.isScalarReg(value[0].reg());
         sel.push();
           /* do first quarter */
           sel.curr.execWidth = 8;
@@ -4232,8 +4267,6 @@ namespace gbe
       AddressMode AM = insn.getAddressMode();
       vector<GenRegister> btiTemp = sel.getBTITemps(AM);
 
-      bool addrScalar = sel.isScalarReg(address.reg());
-
       if (AM == AM_DynamicBti || AM == AM_StaticBti) {
         if (AM == AM_DynamicBti) {
           Register btiReg = insn.getBtiReg();
@@ -4347,6 +4380,7 @@ namespace gbe
         src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
 
       AddressMode AM = insn.getAddressMode();
+      unsigned int addrBytes = typeSize(address.type);
       vector<GenRegister> btiTemp = sel.getBTITemps(AM);
       if (AM != AM_Stateless) {
         GenRegister b;
@@ -4356,9 +4390,13 @@ namespace gbe
           b = GenRegister::immud(insn.getSurfaceIndex());
         }
         write64Legacy(sel, address, src, b, btiTemp);
-      } else if (addrSpace == MEM_CONSTANT || addrSpace == MEM_LOCAL) {
-        GenRegister b = GenRegister::immud(addrSpace == MEM_CONSTANT ? BTI_CONSTANT : 0xfe);
-        write64Legacy(sel, address, src, b, btiTemp);
+      } else if (addrSpace == MEM_LOCAL) {
+        GenRegister b = GenRegister::immud(0xfe);
+        GenRegister addr = address;
+        if (addrBytes == 8) {
+          addr = convertU64ToU32(sel, address);
+        }
+        write64Legacy(sel, addr, src, b, btiTemp);
       } else {
         GBE_ASSERT(sel.hasLongType());
         write64Stateless(sel, address, src);
@@ -4480,7 +4518,8 @@ namespace gbe
     {
       using namespace ir;
       const ir::StoreInstruction &insn = cast<ir::StoreInstruction>(dag.insn);
-      GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+      Register reg = insn.getAddressRegister();
+      GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
       AddressSpace addrSpace = insn.getAddressSpace();
       const Type type = insn.getValueType();
       const uint32_t elemSize = getByteScatterGatherSize(sel, type);
-- 
1.9.1



More information about the Beignet mailing list