[Beignet] [PATCH 2/3] Add the support for 1D image in backend

junyan.he at inbox.com junyan.he at inbox.com
Thu May 15 01:43:30 PDT 2014


From: Junyan He <junyan.he at linux.intel.com>

1. Delete the is3D member in instruction class. Because we need more
   than 1 bit to represent 1D 2D and 3D. We now add an invalid register
   in ir profile, and comparing the coords to it to judge the dimension.
2. Rename all the xxx_image to xxx_image2D to make its meaning clear.
3. Update the according Sampler and Typed_Write instruction in selection
   and Gen IR generation.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  27 +++--
 backend/src/ir/instruction.cpp             |  21 ++--
 backend/src/ir/instruction.hpp             |   6 +-
 backend/src/ir/profile.cpp                 |   1 +
 backend/src/ir/profile.hpp                 |   3 +-
 backend/src/ir/register.cpp                |   2 +
 backend/src/ir/register.hpp                |   3 +
 backend/src/llvm/llvm_gen_backend.cpp      | 162 ++++++++++++++++++++---------
 backend/src/llvm/llvm_gen_ocl_function.hxx |  28 +++--
 backend/src/llvm/llvm_scalarize.cpp        |  31 ++++--
 backend/src/ocl_stdlib.tmpl.h              |  82 +++++++++++++--
 11 files changed, 257 insertions(+), 109 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 88ec408..a7a7982 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3265,15 +3265,19 @@ namespace gbe
       for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
         dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
 
-      if (!insn.is3D())
-        srcNum--;
+      GBE_ASSERT(srcNum == 3);
+      if (insn.getSrc(1) == ir::InvalidRegister) //not 3D
+        srcNum = 1;
+      else if (insn.getSrc(2) == ir::InvalidRegister)
+        srcNum = 2;
 
       if (insn.getSamplerOffset() != 0) {
-        // U, lod, V, [W]
+        // U, lod, [V], [W]
         GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT);
         msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType());
         msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-        msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
+        if (srcNum > 1)
+          msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
         if (srcNum > 2)
           msgPayloads[3] = sel.selReg(insn.getSrc(2), insn.getSrcType());
         // Clear the lod to zero.
@@ -3314,8 +3318,12 @@ namespace gbe
         msgs[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
         for(uint32_t msgID = 1; msgID < 1 + coordNum; msgID++, valueID++)
           msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType());
+
+        // fake u.
+        if (insn.getSrc(1) == ir::InvalidRegister)
+          msgs[2] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
         // fake w.
-        if (!insn.is3D())
+        if (insn.getSrc(2) == ir::InvalidRegister)
           msgs[3] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
         // LOD.
         msgs[4] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
@@ -3343,7 +3351,7 @@ namespace gbe
 
       uint32_t bti = insn.getImageIndex();
       if (simdWidth == 8)
-        sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D());
+        sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::InvalidRegister);
       else {
         sel.push();
         sel.curr.execWidth = 8;
@@ -3359,15 +3367,16 @@ namespace gbe
           sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : GEN_COMPRESSION_Q2;
           // Set U,V,W
           QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), insn.getCoordType()));
-          QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType()));
-          if (insn.is3D())
+	  if (insn.getSrc(1) != ir::InvalidRegister) //not 2D
+            QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType()));
+          if (insn.getSrc(2) != ir::InvalidRegister) //not 3D
             QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), insn.getCoordType()));
           // Set R, G, B, A
           QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(3), insn.getSrcType()));
           QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(4), insn.getSrcType()));
           QUARTER_MOV1(msgs, 7, sel.selReg(insn.getSrc(5), insn.getSrcType()));
           QUARTER_MOV1(msgs, 8, sel.selReg(insn.getSrc(6), insn.getSrcType()));
-          sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D());
+          sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::InvalidRegister);
           #undef QUARTER_MOV0
           #undef QUARTER_MOV1
         }
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 2d2b34b..b351324 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -491,7 +491,7 @@ namespace ir {
       public TupleDstPolicy<SampleInstruction>
     {
     public:
-      SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D) {
+      SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) {
         this->opcode = OP_SAMPLE;
         this->dst = dstTuple;
         this->src = srcTuple;
@@ -500,7 +500,6 @@ namespace ir {
         this->samplerIdx = sampler;
         this->imageIdx = imageIdx;
         this->samplerOffset = samplerOffset;
-        this->is3DRead = is3D;
       }
       INLINE bool wellFormed(const Function &fn, std::string &why) const;
       INLINE void out(std::ostream &out, const Function &fn) const {
@@ -525,12 +524,10 @@ namespace ir {
       INLINE Type getDstType(void) const { return this->dstIsFloat ? TYPE_FLOAT : TYPE_U32; }
       INLINE const uint8_t getSamplerIndex(void) const { return this->samplerIdx; }
       INLINE const uint8_t getSamplerOffset(void) const { return this->samplerOffset; }
-      INLINE const bool is3D(void) const { return !!this->is3DRead; }
       uint8_t srcIsFloat:1;
       uint8_t dstIsFloat:1;
       uint8_t samplerIdx:4;
       uint8_t samplerOffset:1;
-      uint8_t is3DRead:1;
       uint8_t imageIdx;
       static const uint32_t srcNum = 3;
       static const uint32_t dstNum = 4;
@@ -543,13 +540,12 @@ namespace ir {
     {
     public:
 
-      INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type srcType, Type coordType, bool is3D) {
+      INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type srcType, Type coordType) {
         this->opcode = OP_TYPED_WRITE;
         this->src = srcTuple;
         this->coordType = coordType;
         this->srcType = srcType;
         this->imageIdx = imageIdx;
-        this->is3DWrite = is3D;
       }
       INLINE bool wellFormed(const Function &fn, std::string &why) const;
       INLINE void out(std::ostream &out, const Function &fn) const {
@@ -569,9 +565,6 @@ namespace ir {
       uint8_t srcType;
       uint8_t coordType;
       uint8_t imageIdx;
-      uint8_t is3DWrite;
-
-      INLINE const bool is3D(void) const { return !!this->is3DWrite; }
 
       INLINE const uint8_t getImageIndex(void) const { return this->imageIdx; }
       INLINE Type getSrcType(void) const { return (Type)this->srcType; }
@@ -1452,13 +1445,11 @@ DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
 DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
 DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType())
 DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerIndex(void), getSamplerIndex())
-DECL_MEM_FN(SampleInstruction, const bool, is3D(void), is3D())
 DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerOffset(void), getSamplerOffset())
 DECL_MEM_FN(SampleInstruction, const uint8_t, getImageIndex(void), getImageIndex())
 DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType())
 DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType())
 DECL_MEM_FN(TypedWriteInstruction, const uint8_t, getImageIndex(void), getImageIndex())
-DECL_MEM_FN(TypedWriteInstruction, const bool, is3D(void), is3D())
 DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
 DECL_MEM_FN(GetImageInfoInstruction, const uint8_t, getImageIndex(void), getImageIndex())
 
@@ -1638,12 +1629,12 @@ DECL_MEM_FN(GetImageInfoInstruction, const uint8_t, getImageIndex(void), getImag
   }
 
   // SAMPLE
-  Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D) {
-    return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, srcIsFloat, sampler, samplerOffset, is3D).convert();
+  Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) {
+    return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, srcIsFloat, sampler, samplerOffset).convert();
   }
 
-  Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType, bool is3D) {
-    return internal::TypedWriteInstruction(imageIndex, src, srcType, coordType, is3D).convert();
+  Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType) {
+    return internal::TypedWriteInstruction(imageIndex, src, srcType, coordType).convert();
   }
 
   Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg) {
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 582e22d..a29a734 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -351,7 +351,6 @@ namespace ir {
   class TypedWriteInstruction : public Instruction {
   public:
     /*! Return true if the given instruction is an instance of this class */
-    const bool is3D() const;
     static bool isClassOf(const Instruction &insn);
     const uint8_t getImageIndex() const;
     Type getSrcType(void) const;
@@ -361,7 +360,6 @@ namespace ir {
   /*! Load texels from a texture */
   class SampleInstruction : public Instruction {
   public:
-    const bool is3D() const;
     const uint8_t getImageIndex() const;
     const uint8_t getSamplerIndex(void) const;
     const uint8_t getSamplerOffset(void) const;
@@ -662,9 +660,9 @@ namespace ir {
   /*! sync.params... (see Sync instruction) */
   Instruction SYNC(uint32_t parameters);
   /*! typed write */
-  Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType, bool is3D);
+  Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType);
   /*! sample textures */
-  Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D);
+  Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset);
   /*! get image information , such as width/height/depth/... */
   Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg);
   /*! label labelIndex */
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index ddf53d1..4fbf87e 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -81,6 +81,7 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, zero, 1);
       DECL_NEW_REG(FAMILY_DWORD, one, 1);
       DECL_NEW_REG(FAMILY_WORD, retVal, 1);
+      DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
     }
 #undef DECL_NEW_REG
 
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 0652a25..81547d8 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -68,7 +68,8 @@ namespace ir {
     static const Register zero = Register(24);     //  scalar register holds zero.
     static const Register one = Register(25);     //  scalar register holds one. 
     static const Register retVal = Register(26);   // helper register to do data flow analysis.
-    static const uint32_t regNum = 27;             // number of special registers
+    static const Register invalid = Register(27);  // used for valid comparation.
+    static const uint32_t regNum = 28;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp
index 471bfbd..aed3e46 100644
--- a/backend/src/ir/register.cpp
+++ b/backend/src/ir/register.cpp
@@ -27,6 +27,8 @@
 namespace gbe {
 namespace ir {
 
+  const Register InvalidRegister = Register(27);
+
   std::ostream &operator<< (std::ostream &out, const RegisterData &regData)
   {
     switch (regData.family) {
diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp
index 340ebc8..ba15b2d 100644
--- a/backend/src/ir/register.hpp
+++ b/backend/src/ir/register.hpp
@@ -161,6 +161,9 @@ namespace ir {
   /*! Output the register file string in the given stream */
   std::ostream &operator<< (std::ostream &out, const RegisterFile &file);
 
+  /*! The valid register used for comparation. */
+  extern const Register InvalidRegister;
+
 } /* namespace ir */
 } /* namespace gbe */
 
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 8489c87..c23fd49 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2205,22 +2205,32 @@ namespace gbe
       case GEN_OCL_LGBARRIER:
         ctx.getFunction().setUseSLM(true);
         break;
-      case GEN_OCL_WRITE_IMAGE_I:
-      case GEN_OCL_WRITE_IMAGE_UI:
-      case GEN_OCL_WRITE_IMAGE_F:
+      case GEN_OCL_WRITE_IMAGE_I_1D:
+      case GEN_OCL_WRITE_IMAGE_UI_1D:
+      case GEN_OCL_WRITE_IMAGE_F_1D:
+      case GEN_OCL_WRITE_IMAGE_I_2D:
+      case GEN_OCL_WRITE_IMAGE_UI_2D:
+      case GEN_OCL_WRITE_IMAGE_F_2D:
       case GEN_OCL_WRITE_IMAGE_I_3D:
       case GEN_OCL_WRITE_IMAGE_UI_3D:
       case GEN_OCL_WRITE_IMAGE_F_3D:
         break;
-      case GEN_OCL_READ_IMAGE_I:
-      case GEN_OCL_READ_IMAGE_UI:
-      case GEN_OCL_READ_IMAGE_F:
+      case GEN_OCL_READ_IMAGE_I_1D:
+      case GEN_OCL_READ_IMAGE_UI_1D:
+      case GEN_OCL_READ_IMAGE_F_1D:
+      case GEN_OCL_READ_IMAGE_I_2D:
+      case GEN_OCL_READ_IMAGE_UI_2D:
+      case GEN_OCL_READ_IMAGE_F_2D:
       case GEN_OCL_READ_IMAGE_I_3D:
       case GEN_OCL_READ_IMAGE_UI_3D:
       case GEN_OCL_READ_IMAGE_F_3D:
-      case GEN_OCL_READ_IMAGE_I_I:
-      case GEN_OCL_READ_IMAGE_UI_I:
-      case GEN_OCL_READ_IMAGE_F_I:
+
+      case GEN_OCL_READ_IMAGE_I_1D_I:
+      case GEN_OCL_READ_IMAGE_UI_1D_I:
+      case GEN_OCL_READ_IMAGE_F_1D_I:
+      case GEN_OCL_READ_IMAGE_I_2D_I:
+      case GEN_OCL_READ_IMAGE_UI_2D_I:
+      case GEN_OCL_READ_IMAGE_F_2D_I:
       case GEN_OCL_READ_IMAGE_I_3D_I:
       case GEN_OCL_READ_IMAGE_UI_3D_I:
       case GEN_OCL_READ_IMAGE_F_3D_I:
@@ -2401,6 +2411,7 @@ namespace gbe
           default: NOT_IMPLEMENTED;
         }
       } else {
+        int image_dim;
         // Get the name of the called function and handle it
         Value *Callee = I.getCalledValue();
         const std::string fnName = Callee->getName();
@@ -2504,18 +2515,31 @@ namespace gbe
             ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
             break;
           }
-          case GEN_OCL_READ_IMAGE_I:
-          case GEN_OCL_READ_IMAGE_UI:
-          case GEN_OCL_READ_IMAGE_F:
+
+          case GEN_OCL_READ_IMAGE_I_1D:
+          case GEN_OCL_READ_IMAGE_UI_1D:
+          case GEN_OCL_READ_IMAGE_F_1D:
+          case GEN_OCL_READ_IMAGE_I_1D_I:
+          case GEN_OCL_READ_IMAGE_UI_1D_I:
+          case GEN_OCL_READ_IMAGE_F_1D_I:
+            image_dim = 1;
+            goto handle_read_image;
+          case GEN_OCL_READ_IMAGE_I_2D:
+          case GEN_OCL_READ_IMAGE_UI_2D:
+          case GEN_OCL_READ_IMAGE_F_2D:
+          case GEN_OCL_READ_IMAGE_I_2D_I:
+          case GEN_OCL_READ_IMAGE_UI_2D_I:
+          case GEN_OCL_READ_IMAGE_F_2D_I:
+            image_dim = 2;
+            goto handle_read_image;
           case GEN_OCL_READ_IMAGE_I_3D:
           case GEN_OCL_READ_IMAGE_UI_3D:
           case GEN_OCL_READ_IMAGE_F_3D:
-          case GEN_OCL_READ_IMAGE_I_I:
-          case GEN_OCL_READ_IMAGE_UI_I:
-          case GEN_OCL_READ_IMAGE_F_I:
           case GEN_OCL_READ_IMAGE_I_3D_I:
           case GEN_OCL_READ_IMAGE_UI_3D_I:
           case GEN_OCL_READ_IMAGE_F_3D_I:
+            image_dim = 3;
+handle_read_image:
           {
             GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
             const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
@@ -2523,20 +2547,26 @@ namespace gbe
             const uint8_t sampler = this->appendSampler(AI);
             ++AI;
 
-            GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
-            GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+            ir::Register ucoord;
+            ir::Register vcoord;
             ir::Register wcoord;
-            bool is3D = false;
-            if (it->second == GEN_OCL_READ_IMAGE_I_3D    ||
-                it->second == GEN_OCL_READ_IMAGE_UI_3D   ||
-                it->second == GEN_OCL_READ_IMAGE_F_3D    ||
-                it->second == GEN_OCL_READ_IMAGE_I_3D_I  ||
-                it->second == GEN_OCL_READ_IMAGE_UI_3D_I ||
-                it->second == GEN_OCL_READ_IMAGE_F_3D_I) {
-              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
-              is3D = true;
-            } else
-              wcoord = ucoord; // not used, just a padding.
+
+            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+            if (image_dim > 1) {
+              GBE_ASSERT(AI != AE);
+              vcoord = this->getRegister(*AI);
+              ++AI;
+            } else {
+              vcoord = ir::InvalidRegister;
+            }
+
+            if (image_dim > 2) {
+              GBE_ASSERT(AI != AE);
+              wcoord = this->getRegister(*AI);
+              ++AI;
+            } else {
+              wcoord = ir::InvalidRegister;
+            }
 
             vector<ir::Register> dstTupleData, srcTupleData;
             const uint32_t elemNum = 4;
@@ -2561,19 +2591,25 @@ namespace gbe
             ir::Type dstType = ir::TYPE_U32;
 
             switch(it->second) {
-              case GEN_OCL_READ_IMAGE_I:
-              case GEN_OCL_READ_IMAGE_UI:
+              case GEN_OCL_READ_IMAGE_I_1D:
+              case GEN_OCL_READ_IMAGE_UI_1D:
+              case GEN_OCL_READ_IMAGE_I_2D:
+              case GEN_OCL_READ_IMAGE_UI_2D:
               case GEN_OCL_READ_IMAGE_I_3D:
               case GEN_OCL_READ_IMAGE_UI_3D:
-              case GEN_OCL_READ_IMAGE_I_I:
-              case GEN_OCL_READ_IMAGE_UI_I:
+              case GEN_OCL_READ_IMAGE_I_1D_I:
+              case GEN_OCL_READ_IMAGE_UI_1D_I:
+              case GEN_OCL_READ_IMAGE_I_2D_I:
+              case GEN_OCL_READ_IMAGE_UI_2D_I:
               case GEN_OCL_READ_IMAGE_I_3D_I:
               case GEN_OCL_READ_IMAGE_UI_3D_I:
                 dstType = ir::TYPE_U32;
                 break;
-              case GEN_OCL_READ_IMAGE_F:
+              case GEN_OCL_READ_IMAGE_F_1D:
+              case GEN_OCL_READ_IMAGE_F_2D:
               case GEN_OCL_READ_IMAGE_F_3D:
-              case GEN_OCL_READ_IMAGE_F_I:
+              case GEN_OCL_READ_IMAGE_F_1D_I:
+              case GEN_OCL_READ_IMAGE_F_2D_I:
               case GEN_OCL_READ_IMAGE_F_3D_I:
                 dstType = ir::TYPE_FLOAT;
                 break;
@@ -2584,27 +2620,48 @@ namespace gbe
             bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
 
             ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
-                       isFloatCoord, sampler, samplerOffset, is3D);
+                       isFloatCoord, sampler, samplerOffset);
             break;
           }
-          case GEN_OCL_WRITE_IMAGE_I:
-          case GEN_OCL_WRITE_IMAGE_UI:
-          case GEN_OCL_WRITE_IMAGE_F:
+
+          case GEN_OCL_WRITE_IMAGE_I_1D:
+          case GEN_OCL_WRITE_IMAGE_UI_1D:
+          case GEN_OCL_WRITE_IMAGE_F_1D:
+            image_dim = 1;
+            goto handle_write_image;
+          case GEN_OCL_WRITE_IMAGE_I_2D:
+          case GEN_OCL_WRITE_IMAGE_UI_2D:
+          case GEN_OCL_WRITE_IMAGE_F_2D:
+            image_dim = 2;
+            goto handle_write_image;
           case GEN_OCL_WRITE_IMAGE_I_3D:
           case GEN_OCL_WRITE_IMAGE_UI_3D:
           case GEN_OCL_WRITE_IMAGE_F_3D:
+            image_dim = 3;
+handle_write_image:
           {
             GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
             const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
-            GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
-            GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
-            ir::Register wcoord;
-            bool is3D = false;
-            if(it->second >= GEN_OCL_WRITE_IMAGE_I_3D) {
-              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
-              is3D = true;
-            } else
-              wcoord = ucoord; // not used, just padding.
+            ir::Register ucoord, vcoord, wcoord;
+
+            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+
+            if (image_dim > 1) {
+              GBE_ASSERT(AI != AE);
+              vcoord = this->getRegister(*AI);
+              ++AI;
+            } else {
+              vcoord = ir::InvalidRegister;
+            }
+
+            if (image_dim > 2) {
+              GBE_ASSERT(AI != AE);
+              wcoord = this->getRegister(*AI);
+              ++AI;
+            } else {
+              wcoord = ir::InvalidRegister;
+            }
+
             GBE_ASSERT(AI != AE);
             vector<ir::Register> srcTupleData;
 
@@ -2622,13 +2679,16 @@ namespace gbe
             ir::Type srcType = ir::TYPE_U32;
 
             switch(it->second) {
-              case GEN_OCL_WRITE_IMAGE_I:
-              case GEN_OCL_WRITE_IMAGE_UI:
+              case GEN_OCL_WRITE_IMAGE_I_1D:
+              case GEN_OCL_WRITE_IMAGE_UI_1D:
+              case GEN_OCL_WRITE_IMAGE_I_2D:
+              case GEN_OCL_WRITE_IMAGE_UI_2D:
               case GEN_OCL_WRITE_IMAGE_I_3D:
               case GEN_OCL_WRITE_IMAGE_UI_3D:
                 srcType = ir::TYPE_U32;
                 break;
-              case GEN_OCL_WRITE_IMAGE_F:
+              case GEN_OCL_WRITE_IMAGE_F_1D:
+              case GEN_OCL_WRITE_IMAGE_F_2D:
               case GEN_OCL_WRITE_IMAGE_F_3D:
                 srcType = ir::TYPE_FLOAT;
                 break;
@@ -2636,7 +2696,7 @@ namespace gbe
                 GBE_ASSERT(0); // never been here.
             }
 
-            ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32, is3D);
+            ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
             break;
           }
           case GEN_OCL_MUL_HI_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 4236298..e6f25b3 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -46,24 +46,34 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8,  __gen_ocl_force_simd8)
 DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
 
 // To read_image functions.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, _Z21__gen_ocl_read_imageijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, _Z22__gen_ocl_read_imageuijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, _Z21__gen_ocl_read_imagefjtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
 // work around read image with the LD message. The coords are integer type.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_I, _Z21__gen_ocl_read_imageijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_I, _Z22__gen_ocl_read_imageuijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_I, _Z21__gen_ocl_read_imagefjtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij)
 
 // To write_image functions.
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, _Z22__gen_ocl_write_imagefjiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
+
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f)
 
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 73817e2..f1513f8 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -639,19 +639,26 @@ namespace gbe {
 
         // Get the function arguments
         CallSite CS(call);
-        CallSite::arg_iterator CI = CS.arg_begin() + 3;
+        CallSite::arg_iterator CI = CS.arg_begin() + 2;
 
         switch (it->second) {
           default: break;
-          case GEN_OCL_READ_IMAGE_I:
-          case GEN_OCL_READ_IMAGE_UI:
-          case GEN_OCL_READ_IMAGE_F:
+          case GEN_OCL_READ_IMAGE_I_1D:
+          case GEN_OCL_READ_IMAGE_UI_1D:
+          case GEN_OCL_READ_IMAGE_F_1D:
+          case GEN_OCL_READ_IMAGE_I_2D:
+          case GEN_OCL_READ_IMAGE_UI_2D:
+          case GEN_OCL_READ_IMAGE_F_2D:
           case GEN_OCL_READ_IMAGE_I_3D:
           case GEN_OCL_READ_IMAGE_UI_3D:
           case GEN_OCL_READ_IMAGE_F_3D:
-          case GEN_OCL_READ_IMAGE_I_I:
-          case GEN_OCL_READ_IMAGE_UI_I:
-          case GEN_OCL_READ_IMAGE_F_I:
+
+	  case GEN_OCL_READ_IMAGE_I_1D_I:
+          case GEN_OCL_READ_IMAGE_UI_1D_I:
+          case GEN_OCL_READ_IMAGE_F_1D_I:
+          case GEN_OCL_READ_IMAGE_I_2D_I:
+          case GEN_OCL_READ_IMAGE_UI_2D_I:
+          case GEN_OCL_READ_IMAGE_F_2D_I:
           case GEN_OCL_READ_IMAGE_I_3D_I:
           case GEN_OCL_READ_IMAGE_UI_3D_I:
           case GEN_OCL_READ_IMAGE_F_3D_I:
@@ -666,9 +673,13 @@ namespace gbe {
           case GEN_OCL_WRITE_IMAGE_UI_3D:
           case GEN_OCL_WRITE_IMAGE_F_3D:
             CI++;
-          case GEN_OCL_WRITE_IMAGE_I:
-          case GEN_OCL_WRITE_IMAGE_UI:
-          case GEN_OCL_WRITE_IMAGE_F:
+          case GEN_OCL_WRITE_IMAGE_I_2D:
+          case GEN_OCL_WRITE_IMAGE_UI_2D:
+          case GEN_OCL_WRITE_IMAGE_F_2D:
+            CI++;
+          case GEN_OCL_WRITE_IMAGE_I_1D:
+          case GEN_OCL_WRITE_IMAGE_UI_1D:
+          case GEN_OCL_WRITE_IMAGE_F_1D:
           {
             *CI = InsertToVector(call, *CI);
             break;
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index cd8b918..8ab8b31 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -83,12 +83,15 @@ DEF(double);
 // This is a transitional hack to bypass the LLVM 3.3 built-in types.
 // See the Khronos SPIR specification for handling of these types.
 #define __texture __attribute__((address_space(4)))
+struct _image1d_t;
+typedef __texture struct _image1d_t* __image1d_t;
 struct _image2d_t;
 typedef __texture struct _image2d_t* __image2d_t;
 struct _image3d_t;
 typedef __texture struct _image3d_t* __image3d_t;
 typedef const ushort __sampler_t;
 typedef size_t __event_t;
+#define image1d_t __image1d_t
 #define image2d_t __image2d_t
 #define image3d_t __image3d_t
 #define sampler_t __sampler_t
@@ -4545,6 +4548,15 @@ int __gen_ocl_force_simd16(void);
 // Image access functions
 /////////////////////////////////////////////////////////////////////////////
 
+// 1D read
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+
+// 2D read
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
@@ -4552,6 +4564,7 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, in
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
 
+// 3D read
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
@@ -4559,28 +4572,37 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, in
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
 
+// 1D write
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
+
+// 2D write
 OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
 OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
 OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
 
+// 3D write
 OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
 OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
 OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+
 int __gen_ocl_get_image_width(uint surface_id);
 int __gen_ocl_get_image_height(uint surface_id);
 int __gen_ocl_get_image_channel_data_type(uint surface_id);
 int __gen_ocl_get_image_channel_order(uint surface_id);
 int __gen_ocl_get_image_depth(uint surface_id);
 
-#define GET_IMAGE(cl_image, surface_id) \
-    uint surface_id = (uint)cl_image
-
+// 2D 3D Image Common Macro
 #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
 #define GEN_FIX_1 1
 #else
 #define GEN_FIX_1 0
 #endif
 
+#define GET_IMAGE(cl_image, surface_id) \
+    uint surface_id = (uint)cl_image
+
 #define DECL_READ_IMAGE0(int_clamping_fix,          \
                         image_type, type, suffix, coord_type, n)             \
   INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
@@ -4646,6 +4668,52 @@ int __gen_ocl_get_image_depth(uint surface_id);
     __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
   }
 
+
+// 1D
+#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix)                       \
+  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1)               \
+  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1)  \
+  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1)                        \
+  DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
+  DECL_WRITE_IMAGE(image_type, type, suffix, float)
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
+#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
+
+#define OUT_OF_BOX(coord, surface, normalized)                   \
+  (coord < 0 ||                                                  \
+   ((normalized == 0)                                            \
+     && (coord >= __gen_ocl_get_image_width(surface)))           \
+   || ((normalized != 0) && (coord > 0x1p0)))
+
+#define FIXUP_FLOAT_COORD(tmpCoord)                            \
+  {                                                            \
+    if (tmpCoord < 0 && tmpCoord > -0x1p-20f)                  \
+      tmpCoord += -0x1p-9;                                     \
+  }
+
+DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
+DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
+DECL_IMAGE(0, image1d_t, float4, f)
+
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORD1
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef OUT_OF_BOX
+#undef FIXUP_FLOAT_COORD
+#undef DECL_IMAGE
+// End of 1D
+
+#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n)                       \
+  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n)              \
+  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
+  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n)                       \
+  DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
+  DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
+// 2D
 #define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
 #define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
                                                (int)(coord.s1 < 0 ? -1 : coord.s1)
@@ -4668,13 +4736,6 @@ int __gen_ocl_get_image_depth(uint surface_id);
       tmpCoord.s1 += -0x1p-9f;                                 \
   }
 
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n)                       \
-  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n)              \
-  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
-  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n)                       \
-  DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
-  DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
-
 DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
 DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
 DECL_IMAGE(0, image2d_t, float4, f, 2)
@@ -4686,6 +4747,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2)
 #undef OUT_OF_BOX
 #undef FIXUP_FLOAT_COORD
 
+// 3D
 #define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
 #define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
                                                (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
-- 
1.8.3.2



More information about the Beignet mailing list