[Beignet] [PATCH 04/18] GBE: fix the wrong usage of stack pointer and stack buffer.
Zhigang Gong
zhigang.gong at intel.com
Fri Mar 28 00:10:42 PDT 2014
Stack pointer and stack buffer should be two different virtual
register. One is a vector and the other is a scalar. The reason
previous implementation could work is that it search curbe offset
and make a new stack buffer register manually which is not good.
Now fix it and remove those hacking code. We actually don't need
to use curbe offset manually after the allocation.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/context.cpp | 2 +-
backend/src/backend/gen_context.cpp | 7 +++----
backend/src/ir/profile.cpp | 3 ++-
backend/src/ir/profile.hpp | 19 ++++++++++---------
4 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b0402b9..b8f4171 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -538,7 +538,7 @@ namespace gbe
// Insert the stack buffer if used
if (useStackPtr)
- insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+ insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
// After this point the vector is immutable. Sorting it will make
// research faster
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index f6848b2..8bcf454 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -147,7 +147,7 @@ namespace gbe
// Check that everything is consistent in the kernel code
const uint32_t perLaneSize = kernel->getStackSize();
const uint32_t perThreadSize = perLaneSize * this->simdWidth;
- const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+ //const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
GBE_ASSERT(perLaneSize > 0);
GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
@@ -159,9 +159,8 @@ namespace gbe
GenRegister::ud8grf(ir::ocl::stackptr) :
GenRegister::ud16grf(ir::ocl::stackptr);
const GenRegister stackptr = ra->genReg(selStatckPtr);
- const uint32_t nr = offset / GEN_REG_SIZE;
- const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t);
- const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr);
+ const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer);
+ const GenRegister bufferptr = ra->genReg(selStackBuffer);
// We compute the per-lane stack pointer here
p->push();
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 0a64d81..f91e5d4 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -37,7 +37,7 @@ namespace ir {
"local_size_0", "local_size_1", "local_size_2",
"global_size_0", "global_size_1", "global_size_2",
"global_offset_0", "global_offset_1", "global_offset_2",
- "stack_pointer",
+ "stack_pointer", "stack_buffer",
"block_ip",
"barrier_id", "thread_number",
"work_dimension",
@@ -73,6 +73,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, goffset1, 1);
DECL_NEW_REG(FAMILY_DWORD, goffset2, 1);
DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+ DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1);
DECL_NEW_REG(FAMILY_WORD, blockip, 0);
DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index cda5edf..cc19fcb 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -60,15 +60,16 @@ namespace ir {
static const Register goffset1 = Register(16); // get_global_offset(1)
static const Register goffset2 = Register(17); // get_global_offset(2)
static const Register stackptr = Register(18); // stack pointer
- static const Register blockip = Register(19); // blockip
- static const Register barrierid = Register(20);// barrierid
- static const Register threadn = Register(21); // number of threads
- static const Register workdim = Register(22); // work dimention.
- static const Register emask = Register(23); // store the emask bits for the branching fix.
- static const Register notemask = Register(24); // store the !emask bits for the branching fix.
- static const Register barriermask = Register(25); // software mask for barrier.
- static const Register retVal = Register(26); // helper register to do data flow analysis.
- static const uint32_t regNum = 27; // number of special registers
+ static const Register stackbuffer = Register(19); // stack buffer base address.
+ static const Register blockip = Register(20); // blockip
+ static const Register barrierid = Register(21);// barrierid
+ static const Register threadn = Register(22); // number of threads
+ static const Register workdim = Register(23); // work dimention.
+ static const Register emask = Register(24); // store the emask bits for the branching fix.
+ static const Register notemask = Register(25); // store the !emask bits for the branching fix.
+ static const Register barriermask = Register(26); // software mask for barrier.
+ static const Register retVal = Register(27); // helper register to do data flow analysis.
+ static const uint32_t regNum = 28; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
--
1.8.3.2
More information about the Beignet
mailing list