[Beignet] [PATCH 5/5] BDW: Correct stack setting in BDW.
Yang Rong
rong.r.yang at intel.com
Wed Oct 8 23:07:56 PDT 2014
Remove special fftid handle for HSW in Gen8Context, and change stack buffer address
to QWORD, for curbe. Because it only waste 4 bytes register in other platform, change
to QWORD for all platform.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen8_context.cpp | 44 ------------------------------------
backend/src/backend/gen8_context.hpp | 2 --
backend/src/ir/profile.cpp | 6 ++---
3 files changed, 3 insertions(+), 49 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index f7484ca..cd62931 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -51,50 +51,6 @@ namespace gbe
return i;
}
- void Gen8Context::emitStackPointer(void) {
- using namespace ir;
-
- // Only emit stack pointer computation if we use a stack
- if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
- return;
-
- // Check that everything is consistent in the kernel code
- const uint32_t perLaneSize = kernel->getStackSize();
- const uint32_t perThreadSize = perLaneSize * this->simdWidth;
- GBE_ASSERT(perLaneSize > 0);
- GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
- GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
-
- // Use shifts rather than muls which are limited to 32x16 bit sources
- const uint32_t perLaneShift = logi2(perLaneSize);
- const uint32_t perThreadShift = logi2(perThreadSize);
- const GenRegister selStatckPtr = this->simdWidth == 8 ?
- GenRegister::ud8grf(ir::ocl::stackptr) :
- GenRegister::ud16grf(ir::ocl::stackptr);
- const GenRegister stackptr = ra->genReg(selStatckPtr);
- const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer);
- const GenRegister bufferptr = ra->genReg(selStackBuffer);
-
- // We compute the per-lane stack pointer here
- p->push();
- p->curr.execWidth = 1;
- p->curr.predicate = GEN_PREDICATE_NONE;
- //p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff));
- p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x7f));
- p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), GenRegister::immud(0x180));
- p->SHR(GenRegister::ud1grf(126,4), GenRegister::ud1grf(126, 4), GenRegister::immud(7));
- p->curr.execWidth = this->simdWidth;
- p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift));
- p->curr.execWidth = 1;
- p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(2));
- p->ADD(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::ud1grf(126, 4));
- p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift));
- p->curr.execWidth = this->simdWidth;
- p->ADD(stackptr, stackptr, bufferptr);
- p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0));
- p->pop();
- }
-
void Gen8Context::newSelection(void) {
this->sel = GBE_NEW(Selection8, *this);
}
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 684a366..1d5b1d7
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -38,8 +38,6 @@ namespace gbe
};
/*! device's max srcatch buffer size */
#define GEN8_SCRATCH_SIZE (2 * KB * KB)
- /*! Emit the per-lane stack pointer computation */
- virtual void emitStackPointer(void);
/*! Align the scratch size to the device's scratch unit size */
virtual uint32_t alignScratchSize(uint32_t size);
/*! Get the device's max srcatch size */
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index fc69367..57cce4b
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -48,11 +48,11 @@ namespace ir {
#if GBE_DEBUG
#define DECL_NEW_REG(FAMILY, REG, UNIFORM) \
- r = fn.newRegister(FAMILY_DWORD, UNIFORM); \
+ r = fn.newRegister(FAMILY, UNIFORM); \
GBE_ASSERT(r == REG);
#else
#define DECL_NEW_REG(FAMILY, REG, UNIFORM) \
- fn.newRegister(FAMILY_DWORD, UNIFORM);
+ fn.newRegister(FAMILY, UNIFORM);
#endif /* GBE_DEBUG */
static void init(Function &fn) {
IF_DEBUG(Register r);
@@ -75,7 +75,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, goffset1, 1);
DECL_NEW_REG(FAMILY_DWORD, goffset2, 1);
DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
- DECL_NEW_REG(FAMILY_DWORD, stackbuffer, 1);
+ DECL_NEW_REG(FAMILY_QWORD, stackbuffer, 1);
DECL_NEW_REG(FAMILY_WORD, blockip, 0);
DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
DECL_NEW_REG(FAMILY_DWORD, threadn, 1);
--
1.9.1
More information about the Beignet
mailing list