[Beignet] [PATCH 04/18] GBE: fix the wrong usage of stack pointer and stack buffer.

Zhigang Gong zhigang.gong at intel.com
Fri Mar 28 00:10:42 PDT 2014


Stack pointer and stack buffer should be two different virtual
register. One is a vector and the other is a scalar. The reason
previous implementation could work is that it search curbe offset
and make a new stack buffer register manually which is not good.
Now fix it and remove those hacking code. We actually don't need
to use curbe offset manually after the allocation.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/context.cpp     |  2 +-
 backend/src/backend/gen_context.cpp |  7 +++----
 backend/src/ir/profile.cpp          |  3 ++-
 backend/src/ir/profile.hpp          | 19 ++++++++++---------
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b0402b9..b8f4171 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -538,7 +538,7 @@ namespace gbe
 
     // Insert the stack buffer if used
     if (useStackPtr)
-      insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+      insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
 
     // After this point the vector is immutable. Sorting it will make
     // research faster
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index f6848b2..8bcf454 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -147,7 +147,7 @@ namespace gbe
     // Check that everything is consistent in the kernel code
     const uint32_t perLaneSize = kernel->getStackSize();
     const uint32_t perThreadSize = perLaneSize * this->simdWidth;
-    const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+    //const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
     GBE_ASSERT(perLaneSize > 0);
     GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
     GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
@@ -159,9 +159,8 @@ namespace gbe
       GenRegister::ud8grf(ir::ocl::stackptr) :
       GenRegister::ud16grf(ir::ocl::stackptr);
     const GenRegister stackptr = ra->genReg(selStatckPtr);
-    const uint32_t nr = offset / GEN_REG_SIZE;
-    const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t);
-    const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr);
+    const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer);
+    const GenRegister bufferptr = ra->genReg(selStackBuffer);
 
     // We compute the per-lane stack pointer here
     p->push();
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 0a64d81..f91e5d4 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -37,7 +37,7 @@ namespace ir {
         "local_size_0", "local_size_1", "local_size_2",
         "global_size_0", "global_size_1", "global_size_2",
         "global_offset_0", "global_offset_1", "global_offset_2",
-        "stack_pointer",
+        "stack_pointer", "stack_buffer",
         "block_ip",
         "barrier_id", "thread_number",
         "work_dimension",
@@ -73,6 +73,7 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, goffset1, 1);
       DECL_NEW_REG(FAMILY_DWORD, goffset2, 1);
       DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+      DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1);
       DECL_NEW_REG(FAMILY_WORD,  blockip, 0);
       DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
       DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index cda5edf..cc19fcb 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -60,15 +60,16 @@ namespace ir {
     static const Register goffset1 = Register(16); // get_global_offset(1)
     static const Register goffset2 = Register(17); // get_global_offset(2)
     static const Register stackptr = Register(18); // stack pointer
-    static const Register blockip = Register(19);  // blockip
-    static const Register barrierid = Register(20);// barrierid
-    static const Register threadn = Register(21);  // number of threads
-    static const Register workdim = Register(22);  // work dimention.
-    static const Register emask = Register(23);    // store the emask bits for the branching fix.
-    static const Register notemask = Register(24); // store the !emask bits for the branching fix.
-    static const Register barriermask = Register(25); // software mask for barrier.
-    static const Register retVal = Register(26);   // helper register to do data flow analysis.
-    static const uint32_t regNum = 27;             // number of special registers
+    static const Register stackbuffer = Register(19); // stack buffer base address.
+    static const Register blockip = Register(20);  // blockip
+    static const Register barrierid = Register(21);// barrierid
+    static const Register threadn = Register(22);  // number of threads
+    static const Register workdim = Register(23);  // work dimention.
+    static const Register emask = Register(24);    // store the emask bits for the branching fix.
+    static const Register notemask = Register(25); // store the !emask bits for the branching fix.
+    static const Register barriermask = Register(26); // software mask for barrier.
+    static const Register retVal = Register(27);   // helper register to do data flow analysis.
+    static const uint32_t regNum = 28;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
-- 
1.8.3.2



More information about the Beignet mailing list