[Beignet] [PATCH] GBE: handle mad with execution width of one.
Ruiling Song
ruiling.song at intel.com
Thu Jul 7 07:18:08 UTC 2016
This could fix below opencv case failure under non-strict mode:
./opencv_test_video --gtest_filter=OCL_Video/PyrLKOpticalFlow*
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen7_encoder.cpp | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen7_encoder.cpp b/backend/src/backend/gen7_encoder.cpp
index abb8b77..4f35491 100644
--- a/backend/src/backend/gen7_encoder.cpp
+++ b/backend/src/backend/gen7_encoder.cpp
@@ -175,6 +175,16 @@ namespace gbe
{
GenNativeInstruction *insn = this->next(opcode);
Gen7NativeInstruction *gen7_insn = &insn->gen7_insn;
+ int execution_size = 0;
+ if (this->curr.execWidth == 1) {
+ execution_size = GEN_WIDTH_1;
+ } else if (this->curr.execWidth == 8) {
+ execution_size = GEN_WIDTH_8;
+ } else if (this->curr.execWidth == 16) {
+ // Gen7 does not support SIMD16 alu3, still need to use SIMD8
+ execution_size = GEN_WIDTH_8;
+ } else
+ NOT_IMPLEMENTED;
assert(dest.file == GEN_GENERAL_REGISTER_FILE);
assert(dest.nr < 128);
@@ -182,11 +192,11 @@ namespace gbe
assert(dest.type = GEN_TYPE_F);
gen7_insn->bits1.da3src.dest_reg_file = 0;
gen7_insn->bits1.da3src.dest_reg_nr = dest.nr;
- gen7_insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
+ gen7_insn->bits1.da3src.dest_subreg_nr = dest.subnr / 4;
gen7_insn->bits1.da3src.dest_writemask = 0xf;
this->setHeader(insn);
gen7_insn->header.access_mode = GEN_ALIGN_16;
- gen7_insn->header.execution_size = GEN_WIDTH_8;
+ gen7_insn->header.execution_size = execution_size;
assert(src0.file == GEN_GENERAL_REGISTER_FILE);
assert(src0.address_mode == GEN_ADDRESS_DIRECT);
--
2.4.1
More information about the Beignet
mailing list