[Beignet] [PATCH 4/4] Fix some math function error in simd16.
Yang Rong
rong.r.yang at intel.com
Thu Jun 27 01:47:58 PDT 2013
INT DIV splite to simd8 but forget to set quarter_control.
Will fail when predication enable.
Change the atomic test case to trigger this bug.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen_encoder.cpp | 2 ++
kernels/compiler_atomic_functions.cl | 8 ++++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index b843c43..1062643 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1046,6 +1046,7 @@ namespace gbe
if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT ||
function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER) {
insn->header.execution_size = GEN_WIDTH_8;
+ insn->header.quarter_control = GEN_COMPRESSION_Q1;
if(this->curr.execWidth == 16) {
GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
@@ -1056,6 +1057,7 @@ namespace gbe
insn2->header.destreg_or_condmod = function;
this->setHeader(insn2);
insn2->header.execution_size = GEN_WIDTH_8;
+ insn2->header.quarter_control = GEN_COMPRESSION_Q2;
this->setDst(insn2, new_dest);
this->setSrc0(insn2, new_src0);
this->setSrc1(insn2, new_src1);
diff --git a/kernels/compiler_atomic_functions.cl b/kernels/compiler_atomic_functions.cl
index 24f17c2..61ce2f4 100644
--- a/kernels/compiler_atomic_functions.cl
+++ b/kernels/compiler_atomic_functions.cl
@@ -7,8 +7,8 @@ __kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __g
case 1: atomic_dec(&tmp[i]); break;
case 2: atomic_add(&tmp[i], src[lid]); break;
case 3: atomic_sub(&tmp[i], src[lid]); break;
- case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid>>2))); break;
- case 5: atomic_or (&tmp[i], src[lid]<<(lid>>2)); break;
+ case 4: atomic_and(&tmp[i], ~(src[lid]<<(lid / 4))); break;
+ case 5: atomic_or (&tmp[i], src[lid]<<(lid / 4)); break;
case 6: atomic_xor(&tmp[i], src[lid]); break;
case 7: atomic_min(&tmp[i], -src[lid]); break;
case 8: atomic_max(&tmp[i], src[lid]); break;
@@ -23,8 +23,8 @@ __kernel void compiler_atomic_functions(__global int *dst, __local int *tmp, __g
case 1: atomic_dec(&dst[i]); break;
case 2: atomic_add(&dst[i], src[lid]); break;
case 3: atomic_sub(&dst[i], src[lid]); break;
- case 4: atomic_and(&dst[i], ~(src[lid]<<(lid>>2))); break;
- case 5: atomic_or (&dst[i], src[lid]<<(lid>>2)); break;
+ case 4: atomic_and(&dst[i], ~(src[lid]<<(lid / 4))); break;
+ case 5: atomic_or (&dst[i], src[lid]<<(lid / 4)); break;
case 6: atomic_xor(&dst[i], src[lid]); break;
case 7: atomic_min(&dst[i], -src[lid]); break;
case 8: atomic_max(&dst[i], src[lid]); break;
--
1.7.10.4
More information about the Beignet
mailing list