[Beignet] [PATCH 2/3] ocl20/GBE: support generic load/store

Ruiling Song ruiling.song at intel.com
Thu Nov 26 18:09:09 PST 2015


Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 88 +++++++++++++++++++++++++++++-
 backend/src/backend/gen_register.hpp       |  6 ++
 backend/src/ir/instruction.cpp             |  1 +
 backend/src/ir/instruction.hpp             |  1 +
 backend/src/llvm/llvm_gen_backend.cpp      |  8 ++-
 5 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1d4fcda..d408a98 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3577,6 +3577,17 @@ namespace gbe
         return GEN_BYTE_SCATTER_BYTE;
     }
   }
+  ir::Register generateLocalMask(Selection::Opaque &sel, GenRegister addr) {
+    sel.push();
+      ir::Register localMask = sel.reg(ir::FAMILY_BOOL);
+      sel.curr.physicalFlag = 0;
+      sel.curr.modFlag = 1;
+      sel.curr.predicate = GEN_PREDICATE_NONE;
+      sel.curr.flagIndex = localMask;
+      sel.CMP(GEN_CONDITIONAL_L, addr, GenRegister::immud(64*1024));
+    sel.pop();
+    return localMask;
+  }
 
   class LoadInstructionPattern : public SelectionPattern
   {
@@ -3657,9 +3668,9 @@ namespace gbe
           sel.curr.noMask = 1;
           sel.curr.predicate = GEN_PREDICATE_NONE;
         }
+        vector<GenRegister> btiTemp = sel.getBTITemps(AM);
 
         if (AM == AM_DynamicBti || AM == AM_StaticBti) {
-          vector<GenRegister> btiTemp = sel.getBTITemps(AM);
           if (AM == AM_DynamicBti) {
             Register btiReg = insn.getBtiReg();
             sel.UNTYPED_READ(addr, dst.data(), valueNum, sel.selReg(btiReg, TYPE_U32), btiTemp);
@@ -3673,8 +3684,20 @@ namespace gbe
           GenRegister addrDW = addr;
           if (addrBytes == 8)
             addrDW = convertU64ToU32(sel, addr);
-          vector<GenRegister> btiTemp;
           sel.UNTYPED_READ(addrDW, dst.data(), valueNum, GenRegister::immud(bti), btiTemp);
+        } else if (addrSpace == ir::MEM_GENERIC) {
+          Register localMask = generateLocalMask(sel, addr);
+          sel.push();
+            sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+            GenRegister addrDW = addr;
+            if (addrBytes == 8)
+              addrDW = convertU64ToU32(sel, addr);
+            sel.UNTYPED_READ(addrDW, dst.data(), valueNum, GenRegister::immud(0xfe), btiTemp);
+
+            sel.curr.inversePredicate = 1;
+            untypedReadStateless(sel, addr, dst);
+          sel.pop();
+
         } else {
           untypedReadStateless(sel, addr, dst);
         }
@@ -3825,6 +3848,18 @@ namespace gbe
           if (addrBytes == 8)
             addrDW = convertU64ToU32(sel, addr);
           read64Legacy(sel, addrDW, dst, b, btiTemp);
+        } else if (addrSpace == ir::MEM_GENERIC) {
+          Register localMask = generateLocalMask(sel, addr);
+          sel.push();
+            sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+            GenRegister addrDW = addr;
+            if (addrBytes == 8)
+              addrDW = convertU64ToU32(sel, addr);
+            read64Legacy(sel, addrDW, dst, GenRegister::immud(0xfe), btiTemp);
+
+            sel.curr.inversePredicate = 1;
+            read64Stateless(sel, addr, dst);
+          sel.pop();
         } else {
           read64Stateless(sel, addr, dst);
         }
@@ -4036,6 +4071,18 @@ namespace gbe
         }
 
         sel.BYTE_GATHER(dst, addrDW, elemSize, GenRegister::immud(bti), btiTemp);
+      } else if (addrSpace == ir::MEM_GENERIC) {
+        Register localMask = generateLocalMask(sel, addr);
+        sel.push();
+          sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+          GenRegister addrDW = addr;
+          if (addrBytes == 8)
+            addrDW = convertU64ToU32(sel, addr);
+          sel.BYTE_GATHER(dst, addrDW, elemSize, GenRegister::immud(0xfe), btiTemp);
+
+          sel.curr.inversePredicate = 1;
+          byteGatherStateless(sel, addr, dst, elemSize);
+        sel.pop();
       } else {
         byteGatherStateless(sel, addr, dst, elemSize);
       }
@@ -4151,6 +4198,7 @@ namespace gbe
                  insn.getAddressSpace() == MEM_CONSTANT ||
                  insn.getAddressSpace() == MEM_PRIVATE ||
                  insn.getAddressSpace() == MEM_LOCAL ||
+                 insn.getAddressSpace() == MEM_GENERIC ||
                  insn.getAddressSpace() == MEM_MIXED);
       //GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
 
@@ -4281,6 +4329,18 @@ namespace gbe
           addr = convertU64ToU32(sel, address);
         }
         sel.UNTYPED_WRITE(addr, value.data(), valueNum, GenRegister::immud(0xfe), btiTemp);
+      } else if (addrSpace == ir::MEM_GENERIC) {
+        Register localMask = generateLocalMask(sel, address);
+        sel.push();
+          sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+          GenRegister addrDW = address;
+          if (addrBytes == 8)
+            addrDW = convertU64ToU32(sel, address);
+          sel.UNTYPED_WRITE(addrDW, value.data(), valueNum, GenRegister::immud(0xfe), btiTemp);
+
+          sel.curr.inversePredicate = 1;
+          untypedWriteStateless(sel, address, value);
+        sel.pop();
       } else {
         untypedWriteStateless(sel, address, value);
       }
@@ -4397,6 +4457,18 @@ namespace gbe
           addr = convertU64ToU32(sel, address);
         }
         write64Legacy(sel, addr, src, b, btiTemp);
+      } else if (addrSpace == ir::MEM_GENERIC) {
+        Register localMask = generateLocalMask(sel, address);
+        sel.push();
+          sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+          GenRegister addrDW = address;
+          if (addrBytes == 8)
+            addrDW = convertU64ToU32(sel, address);
+          write64Legacy(sel, addrDW, src, GenRegister::immud(0xfe), btiTemp);
+
+          sel.curr.inversePredicate = 1;
+          write64Stateless(sel, address, src);
+        sel.pop();
       } else {
         GBE_ASSERT(sel.hasLongType());
         write64Stateless(sel, address, src);
@@ -4466,6 +4538,18 @@ namespace gbe
           addr = convertU64ToU32(sel, address);
         }
         sel.BYTE_SCATTER(addr, data, elemSize, GenRegister::immud(0xfe), btiTemp);
+      } else if (addrSpace == ir::MEM_GENERIC) {
+        Register localMask = generateLocalMask(sel, address);
+        sel.push();
+          sel.curr.useVirtualFlag(localMask, GEN_PREDICATE_NORMAL);
+          GenRegister addrDW = address;
+          if (addrBytes == 8)
+            addrDW = convertU64ToU32(sel, address);
+          sel.BYTE_SCATTER(addrDW, data, elemSize, GenRegister::immud(0xfe), btiTemp);
+
+          sel.curr.inversePredicate = 1;
+          byteScatterStateless(sel, address, data, elemSize);
+        sel.pop();
       } else {
         byteScatterStateless(sel, address, data, elemSize);
       }
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index bafff29..f26dec4 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -169,6 +169,12 @@ namespace gbe
           NOT_IMPLEMENTED;
       }
     }
+    void useVirtualFlag(ir::Register flag, unsigned pred) {
+      modFlag = 0;
+      physicalFlag = 0;
+      flagIndex = flag;
+      predicate = pred;
+    }
     void useFlag(int nr, int subnr) {
       flag = nr;
       subFlag = subnr;
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 0246920..c1dbaa1 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1415,6 +1415,7 @@ namespace ir {
       case MEM_CONSTANT: return out << "constant";
       case MEM_PRIVATE: return out << "private";
       case MEM_MIXED: return out << "mixed";
+      case MEM_GENERIC: return out << "generic";
       case MEM_INVALID: return out << "invalid";
     };
     return out;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index b8f95ba..09861b0 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -62,6 +62,7 @@ namespace ir {
     MEM_CONSTANT,   //!< Immutable global memory
     MEM_PRIVATE,    //!< Per thread private memory
     MEM_MIXED,      //!< mixed address space pointer.
+    MEM_GENERIC,      //!< mixed address space pointer.
     MEM_INVALID
   };
 
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 0e2b809..c1c8097 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -211,6 +211,7 @@ namespace gbe
       case 1: return ir::MEM_GLOBAL;
       case 2: return ir::MEM_CONSTANT;
       case 3: return ir::MEM_LOCAL;
+      case 4: return ir::MEM_GENERIC;
     }
     GBE_ASSERT(false);
     return ir::MEM_GLOBAL;
@@ -3097,7 +3098,10 @@ namespace gbe
       case Instruction::FPTrunc:
       case Instruction::Trunc:
         this->newRegister(&I);
-      break;
+        break;
+      case Instruction::AddrSpaceCast:
+        regTranslator.newValueProxy(srcValue, dstValue);
+        break;
       default: NOT_SUPPORTED;
     }
   }
@@ -3105,6 +3109,8 @@ namespace gbe
   void GenWriter::emitCastInst(CastInst &I) {
     switch (I.getOpcode())
     {
+      case Instruction::AddrSpaceCast:
+        break;
       case Instruction::PtrToInt:
       case Instruction::IntToPtr:
       {
-- 
2.4.1



More information about the Beignet mailing list