[Beignet] [PATCH] GBE: fix long work group fail.
Yang Rong
rong.r.yang at intel.com
Fri Jan 6 02:16:49 UTC 2017
In work group, long ops should take care execWidth.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen8_context.cpp | 48 ++++++++++++++++++++++--------------
backend/src/backend/gen_context.cpp | 48 ++++++++++++++++++++++--------------
2 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 2bb8ad1..34baee8 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1854,10 +1854,12 @@ namespace gbe
{
GenRegister threadDataL = GenRegister::retype(threadData, GEN_TYPE_D);
GenRegister threadDataH = threadDataL.offset(threadDataL, 0, 4);
- p->MOV(msgData.offset(msgData, 0), threadDataL);
- p->MOV(msgData.offset(msgData, 1), threadDataH);
-
+ GenRegister msgDataL = GenRegister::retype(msgData, GEN_TYPE_D);
+ GenRegister msgDataH = msgDataL.offset(msgDataL, 1);
p->curr.execWidth = 8;
+ p->MOV(msgDataL, threadDataL);
+ p->MOV(msgDataH, threadDataH);
+
p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
p->ADD(msgAddr, msgAddr, msgSlmOff);
p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 2, insn.extra.wgop.splitSend);
@@ -1953,30 +1955,38 @@ namespace gbe
else if(wg_op == ir::WORKGROUP_OP_INCLUSIVE_MIN
|| wg_op == ir::WORKGROUP_OP_EXCLUSIVE_MIN)
{
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
/* workaround QW datatype on CMP */
if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L){
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 1, 0),
- dst.offset(dst, 1, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 2, 0),
- dst.offset(dst, 2, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 3, 0),
- dst.offset(dst, 3, 0), partialData);
- }
+ p->push();
+ p->curr.execWidth = 8;
+ p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
+ if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->SEL_CMP(GEN_CONDITIONAL_LE, GenRegister::Qn(dst, 1),
+ GenRegister::Qn(dst, 1), GenRegister::Qn(partialData, 1));
+ }
+ p->pop();
+ } else
+ p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
}
else if(wg_op == ir::WORKGROUP_OP_INCLUSIVE_MAX
|| wg_op == ir::WORKGROUP_OP_EXCLUSIVE_MAX)
{
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
/* workaround QW datatype on CMP */
if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L){
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 1, 0),
- dst.offset(dst, 1, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 2, 0),
- dst.offset(dst, 2, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 3, 0),
- dst.offset(dst, 3, 0), partialData);
- }
+ p->push();
+ p->curr.execWidth = 8;
+ p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
+ if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->SEL_CMP(GEN_CONDITIONAL_GE, GenRegister::Qn(dst, 1),
+ GenRegister::Qn(dst, 1), GenRegister::Qn(partialData, 1));
+ }
+ p->pop();
+ } else
+ p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
}
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 98a8944..0c1f82a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3309,10 +3309,12 @@ namespace gbe
{
GenRegister threadDataL = GenRegister::retype(threadData, GEN_TYPE_D);
GenRegister threadDataH = threadDataL.offset(threadDataL, 0, 4);
- p->MOV(msgData.offset(msgData, 0), threadDataL);
- p->MOV(msgData.offset(msgData, 1), threadDataH);
-
+ GenRegister msgDataL = GenRegister::retype(msgData, GEN_TYPE_D);
+ GenRegister msgDataH = msgDataL.offset(msgDataL, 1);
p->curr.execWidth = 8;
+ p->MOV(msgDataL, threadDataL);
+ p->MOV(msgDataH, threadDataH);
+
p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
p->ADD(msgAddr, msgAddr, msgSlmOff);
p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
@@ -3408,30 +3410,38 @@ namespace gbe
else if(wg_op == ir::WORKGROUP_OP_INCLUSIVE_MIN
|| wg_op == ir::WORKGROUP_OP_EXCLUSIVE_MIN)
{
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
/* workaround QW datatype on CMP */
if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L){
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 1, 0),
- dst.offset(dst, 1, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 2, 0),
- dst.offset(dst, 2, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_LE, dst.offset(dst, 3, 0),
- dst.offset(dst, 3, 0), partialData);
- }
+ p->push();
+ p->curr.execWidth = 8;
+ p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
+ if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->SEL_CMP(GEN_CONDITIONAL_LE, GenRegister::Qn(dst, 1),
+ GenRegister::Qn(dst, 1), GenRegister::Qn(partialData, 1));
+ }
+ p->pop();
+ } else
+ p->SEL_CMP(GEN_CONDITIONAL_LE, dst, dst, partialData);
}
else if(wg_op == ir::WORKGROUP_OP_INCLUSIVE_MAX
|| wg_op == ir::WORKGROUP_OP_EXCLUSIVE_MAX)
{
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
/* workaround QW datatype on CMP */
if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L){
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 1, 0),
- dst.offset(dst, 1, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 2, 0),
- dst.offset(dst, 2, 0), partialData);
- p->SEL_CMP(GEN_CONDITIONAL_GE, dst.offset(dst, 3, 0),
- dst.offset(dst, 3, 0), partialData);
- }
+ p->push();
+ p->curr.execWidth = 8;
+ p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
+ if (simd == 16) {
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->SEL_CMP(GEN_CONDITIONAL_GE, GenRegister::Qn(dst, 1),
+ GenRegister::Qn(dst, 1), GenRegister::Qn(partialData, 1));
+ }
+ p->pop();
+ } else
+ p->SEL_CMP(GEN_CONDITIONAL_GE, dst, dst, partialData);
}
}
--
2.7.4
More information about the Beignet
mailing list