[Beignet] [PATCH 3/5 version2] add third coord in backend

Homer Hsing homer.xing at intel.com
Wed May 8 01:05:16 PDT 2013


Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp   | 14 +++++---
 backend/src/ir/instruction.cpp        | 12 ++++---
 backend/src/llvm/llvm_gen_backend.cpp | 60 +++++++++++++++++++++++++++++++++--
 3 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 1f867b8..4a16cae 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -291,6 +291,7 @@ namespace gbe
     const GenRegister sampler = ra->genReg(insn.src(5));
     const GenRegister ucoord = ra->genReg(insn.src(6));
     const GenRegister vcoord = ra->genReg(insn.src(7));
+    const GenRegister wcoord = ra->genReg(insn.src(8));
     const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
     const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
     uint32_t simdWidth = p->curr.execWidth;
@@ -309,6 +310,8 @@ namespace gbe
     /* Prepare message payload. */
     p->MOV(GenRegister::f8grf(nr , 0), ucoord);
     p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
+    if (insn.src(8).reg() != 0)
+      p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
     p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
 
     p->pop();
@@ -319,10 +322,11 @@ namespace gbe
     const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
     const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
     const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
-    const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
-    const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
-    const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
-    const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
+    const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
+    const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
+    const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
+    const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
+    const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
     const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
 
     p->push();
@@ -357,6 +361,8 @@ namespace gbe
                                         GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type))
       QUARTER_MOV0(nr + 1, ucoord);
       QUARTER_MOV0(nr + 2, vcoord);
+      if (insn.src(3 + insn.extra.elem).reg() != 0)
+        QUARTER_MOV0(nr + 3, wcoord);
       QUARTER_MOV1(nr + 5, R);
       QUARTER_MOV1(nr + 6, G);
       QUARTER_MOV1(nr + 7, B);
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 9fd4247..8980abf 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -414,6 +414,7 @@ namespace ir {
             << " sampler %" << this->getSrc(fn, 1)
             << " coord u %" << this->getSrc(fn, 2)
             << " coord v %" << this->getSrc(fn, 3)
+            << " coord w %" << this->getSrc(fn, 4)
             << " %" << this->getDst(fn, 0)
             << " %" << this->getDst(fn, 1)
             << " %" << this->getDst(fn, 2)
@@ -427,7 +428,7 @@ namespace ir {
       INLINE Type getSrcType(void) const { return this->srcType; }
       INLINE Type getDstType(void) const { return this->dstType; }
 
-      static const uint32_t srcNum = 4;
+      static const uint32_t srcNum = 5;
       static const uint32_t dstNum = 4;
     };
 
@@ -451,10 +452,11 @@ namespace ir {
             << " surface id %" << this->getSrc(fn, 0)
             << " coord u %" << this->getSrc(fn, 1)
             << " coord v %" << this->getSrc(fn, 2)
-            << " %" << this->getSrc(fn, 3)
+            << " coord w %" << this->getSrc(fn, 3)
             << " %" << this->getSrc(fn, 4)
             << " %" << this->getSrc(fn, 5)
-            << " %" << this->getSrc(fn, 6);
+            << " %" << this->getSrc(fn, 6)
+            << " %" << this->getSrc(fn, 7);
       }
 
       Tuple src;
@@ -463,8 +465,8 @@ namespace ir {
 
       INLINE Type getSrcType(void) const { return this->srcType; }
       INLINE Type getCoordType(void) const { return this->coordType; }
-      // bti, u, v, 4 data elements
-      static const uint32_t srcNum = 7;
+      // bti, u, v, w, 4 data elements
+      static const uint32_t srcNum = 8;
       Register dst[0];               //!< No dest register
     };
 
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index f04a6ba..ad465e2 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1828,6 +1828,12 @@ namespace gbe
       case GEN_OCL_WRITE_IMAGE3:
       case GEN_OCL_WRITE_IMAGE4:
       case GEN_OCL_WRITE_IMAGE5:
+      case GEN_OCL_WRITE_IMAGE10:
+      case GEN_OCL_WRITE_IMAGE11:
+      case GEN_OCL_WRITE_IMAGE12:
+      case GEN_OCL_WRITE_IMAGE13:
+      case GEN_OCL_WRITE_IMAGE14:
+      case GEN_OCL_WRITE_IMAGE15:
         break;
       case GEN_OCL_READ_IMAGE0:
       case GEN_OCL_READ_IMAGE1:
@@ -1835,6 +1841,12 @@ namespace gbe
       case GEN_OCL_READ_IMAGE3:
       case GEN_OCL_READ_IMAGE4:
       case GEN_OCL_READ_IMAGE5:
+      case GEN_OCL_READ_IMAGE10:
+      case GEN_OCL_READ_IMAGE11:
+      case GEN_OCL_READ_IMAGE12:
+      case GEN_OCL_READ_IMAGE13:
+      case GEN_OCL_READ_IMAGE14:
+      case GEN_OCL_READ_IMAGE15:
       {
       // dst is a 4 elements vector. We allocate all 4 registers here.
         uint32_t elemNum;
@@ -1970,11 +1982,26 @@ namespace gbe
           case GEN_OCL_READ_IMAGE3:
           case GEN_OCL_READ_IMAGE4:
           case GEN_OCL_READ_IMAGE5:
+          case GEN_OCL_READ_IMAGE10:
+          case GEN_OCL_READ_IMAGE11:
+          case GEN_OCL_READ_IMAGE12:
+          case GEN_OCL_READ_IMAGE13:
+          case GEN_OCL_READ_IMAGE14:
+          case GEN_OCL_READ_IMAGE15:
           {
             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+            ir::Register wcoord;
+            if (it->second == GEN_OCL_READ_IMAGE10 ||
+                it->second == GEN_OCL_READ_IMAGE11 ||
+                it->second == GEN_OCL_READ_IMAGE12 ||
+                it->second == GEN_OCL_READ_IMAGE13 ||
+                it->second == GEN_OCL_READ_IMAGE14) {
+              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+            } else
+              wcoord = ir::Register(0);
 
             vector<ir::Register> dstTupleData, srcTupleData;
             const uint32_t elemNum = 4;
@@ -1986,26 +2013,33 @@ namespace gbe
             srcTupleData.push_back(sampler);
             srcTupleData.push_back(ucoord);
             srcTupleData.push_back(vcoord);
+            srcTupleData.push_back(wcoord);
             const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
-            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 4);
+            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5);
 
             ir::Type srcType = ir::TYPE_U32, dstType = ir::TYPE_U32;
 
             switch(it->second) {
               case GEN_OCL_READ_IMAGE0:
               case GEN_OCL_READ_IMAGE2:
+              case GEN_OCL_READ_IMAGE10:
+              case GEN_OCL_READ_IMAGE12:
                 srcType = dstType = ir::TYPE_U32;
                 break;
               case GEN_OCL_READ_IMAGE1:
               case GEN_OCL_READ_IMAGE3:
+              case GEN_OCL_READ_IMAGE11:
+              case GEN_OCL_READ_IMAGE13:
                 dstType = ir::TYPE_U32;
                 srcType = ir::TYPE_FLOAT;
                 break;
               case GEN_OCL_READ_IMAGE4:
+              case GEN_OCL_READ_IMAGE14:
                 dstType = ir::TYPE_FLOAT;
                 srcType = ir::TYPE_U32;
                 break;
               case GEN_OCL_READ_IMAGE5:
+              case GEN_OCL_READ_IMAGE15:
                 srcType = dstType = ir::TYPE_FLOAT;
                 break;
               default:
@@ -2021,41 +2055,63 @@ namespace gbe
           case GEN_OCL_WRITE_IMAGE3:
           case GEN_OCL_WRITE_IMAGE4:
           case GEN_OCL_WRITE_IMAGE5:
+          case GEN_OCL_WRITE_IMAGE10:
+          case GEN_OCL_WRITE_IMAGE11:
+          case GEN_OCL_WRITE_IMAGE12:
+          case GEN_OCL_WRITE_IMAGE13:
+          case GEN_OCL_WRITE_IMAGE14:
+          case GEN_OCL_WRITE_IMAGE15:
           {
             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+            ir::Register wcoord;
+            if(it->second == GEN_OCL_WRITE_IMAGE10 ||
+               it->second == GEN_OCL_WRITE_IMAGE11 ||
+               it->second == GEN_OCL_WRITE_IMAGE12 ||
+               it->second == GEN_OCL_WRITE_IMAGE13 ||
+               it->second == GEN_OCL_WRITE_IMAGE14) {
+              GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
+            } else
+              wcoord = ir::Register(0);
             GBE_ASSERT(AI != AE);
             vector<ir::Register> srcTupleData;
 
             srcTupleData.push_back(surface_id);
             srcTupleData.push_back(ucoord);
             srcTupleData.push_back(vcoord);
+            srcTupleData.push_back(wcoord);
 
             const uint32_t elemNum = 4;
             for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
               const ir::Register reg = this->getRegister(*AI, elemID);
               srcTupleData.push_back(reg);
             }
-            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
+            const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8);
 
             ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
 
             switch(it->second) {
               case GEN_OCL_WRITE_IMAGE0:
               case GEN_OCL_WRITE_IMAGE2:
+              case GEN_OCL_WRITE_IMAGE10:
+              case GEN_OCL_WRITE_IMAGE12:
                 srcType = coordType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE1:
               case GEN_OCL_WRITE_IMAGE3:
+              case GEN_OCL_WRITE_IMAGE11:
+              case GEN_OCL_WRITE_IMAGE13:
                 coordType = ir::TYPE_FLOAT;
                 srcType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE4:
+              case GEN_OCL_WRITE_IMAGE14:
                 srcType = ir::TYPE_FLOAT;
                 coordType = ir::TYPE_U32;
                 break;
               case GEN_OCL_WRITE_IMAGE5:
+              case GEN_OCL_WRITE_IMAGE15:
                 srcType = coordType = ir::TYPE_FLOAT;
                 break;
               default:
-- 
1.8.1.2



More information about the Beignet mailing list