[Beignet] [PATCH] 3 op math functions dst need 16 byte align when allocate register.
xionghu.luo at intel.com
xionghu.luo at intel.com
Thu May 19 11:44:05 UTC 2016
From: Luo Xionghu <xionghu.luo at intel.com>
gpu hang will happen for uniform mad instruction without dest
register 16-byte alignment, check and adjust to 16 byte align
for mad dst before register allocate.
this patch could fix
"STRICT=0, opencv_test_video/OCL_Video/PyrLKOpticalFlow."
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
backend/src/backend/gen_reg_allocation.cpp | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index da3dac0..1aaac58 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -49,10 +49,11 @@ namespace gbe
*/
struct GenRegInterval {
INLINE GenRegInterval(ir::Register reg) :
- reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0) {}
+ reg(reg), minID(INT_MAX), maxID(-INT_MAX), conflictReg(0), b3OpAlign(0) {}
ir::Register reg; //!< (virtual) register of the interval
int32_t minID, maxID; //!< Starting and ending points
ir::Register conflictReg; // < has banck conflict with this register
+ bool b3OpAlign;
};
typedef struct GenRegIntervalKey {
@@ -1050,6 +1051,9 @@ namespace gbe
}
}
}
+ if (interval.b3OpAlign != 0) {
+ alignment = (alignment + 15) & ~15;
+ }
while ((grfOffset = ctx.allocate(size, alignment, direction)) == -1) {
const bool success = this->expireGRF(interval);
if (success == false) {
@@ -1138,6 +1142,9 @@ namespace gbe
reg == ir::ocl::groupid1 ||
reg == ir::ocl::groupid2)
continue;
+ if (is3SrcOp) {
+ this->intervals[reg].b3OpAlign = 1;
+ }
this->intervals[reg].minID = std::min(this->intervals[reg].minID, insnID);
this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, insnID);
}
--
2.1.4
More information about the Beignet
mailing list