[Beignet] [PATCH] backend: refine the unpack operation to 64bit register
rander.wang
rander.wang at intel.com
Thu Apr 20 07:27:07 UTC 2017
the layout of long or double on Gen7x and Gen8x or later are
different. For Gen7x, the layout of long or double is like:
L0L1L2L3L4L5L6L7
H0H1H2H3H4H5H6H7,
but for Gen8x or later, the layout is :
H0L0H1L1H2L2H3L3
H4L4H5L5H6L6H7L7
Now define a dedicated interace class to deal with this case
Signed-off-by: rander.wang <rander.wang at intel.com>
---
backend/src/CMakeLists.txt | 1 +
backend/src/backend/gen8_context.cpp | 28 +----------
backend/src/backend/gen_context.cpp | 51 ++++++++++----------
backend/src/backend/gen_context.hpp | 3 ++
backend/src/backend/gen_encoder.cpp | 8 ++--
backend/src/backend/gen_encoder.hpp | 5 ++
backend/src/backend/gen_program.cpp | 9 ++++
backend/src/backend/gen_reg_relayout.cpp | 81 ++++++++++++++++++++++++++++++++
backend/src/backend/gen_reg_relayout.hpp | 54 +++++++++++++++++++++
9 files changed, 184 insertions(+), 56 deletions(-)
create mode 100644 backend/src/backend/gen_reg_relayout.cpp
create mode 100644 backend/src/backend/gen_reg_relayout.hpp
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 7c1f4db..5a0f580 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -144,6 +144,7 @@ set (GBE_SRC
backend/gen8_encoder.cpp
backend/gen9_encoder.hpp
backend/gen9_encoder.cpp
+ backend/gen_reg_relayout.cpp
)
set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 34baee8..b49e3a5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -790,33 +790,7 @@ namespace gbe
void Gen8Context::emitI64DIVREMInstruction(const SelectionInstruction &cnst_insn)
{
- SelectionInstruction* insn = const_cast<SelectionInstruction*>(&cnst_insn);
- GenRegister packed_src0 = ra->genReg(insn->src(0));
- GenRegister packed_src1 = ra->genReg(insn->src(1));
- GenRegister dst = ra->genReg(insn->dst(0));
- int tmp_reg_n = 14;
-
- if (packed_src0.hstride != GEN_HORIZONTAL_STRIDE_0) {
- GenRegister unpacked_src0 = ra->genReg(insn->dst(tmp_reg_n));
- unpackLongVec(packed_src0, unpacked_src0, p->curr.execWidth);
- tmp_reg_n++;
- insn->src(0) = unpacked_src0;
- }
- if (packed_src1.hstride != GEN_HORIZONTAL_STRIDE_0) {
- GenRegister unpacked_src1 = ra->genReg(insn->dst(tmp_reg_n));
- unpackLongVec(packed_src1, unpacked_src1, p->curr.execWidth);
- tmp_reg_n++;
- insn->src(1) = unpacked_src1;
- }
- GBE_ASSERT(tmp_reg_n <= insn->dstNum);
-
- GenContext::emitI64DIVREMInstruction(*insn);
-
- if (dst.hstride != GEN_HORIZONTAL_STRIDE_0) {
- GenRegister dst_packed = ra->genReg(insn->dst(14));
- packLongVec(dst, dst_packed, p->curr.execWidth);
- p->MOV(dst, dst_packed);
- }
+ GenContext::emitI64DIVREMInstruction(cnst_insn);
}
void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 3ef8a4d..f62f4d0 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -71,6 +71,7 @@ namespace gbe
GBE_SAFE_DELETE(sel);
GBE_SAFE_DELETE(p);
this->p = generateEncoder();
+ this->p->setReg64UnpackMethod(iRegUpk);
this->newSelection();
this->ra = GBE_NEW(GenRegAllocator, *this);
this->branchPos2.clear();
@@ -294,7 +295,7 @@ namespace gbe
case SEL_OP_BFREV: p->BFREV(dst, src); break;
case SEL_OP_CONVI64_TO_I:
{
- p->MOV(dst, src.bottom_half());
+ p->MOV(dst, iRegUpk->LowDW(src));
break;
}
case SEL_OP_BRC:
@@ -361,11 +362,11 @@ namespace gbe
p->MOV(middle, src);
}
- p->MOV(dst.bottom_half(), middle);
+ p->MOV(iRegUpk->LowDW(dst), middle);
if(src.is_signed_int())
- p->ASR(dst.top_half(this->simdWidth), middle, GenRegister::immud(31));
+ p->ASR(iRegUpk->HighDW(dst, this->simdWidth), middle, GenRegister::immud(31));
else
- p->MOV(dst.top_half(this->simdWidth), GenRegister::immud(0));
+ p->MOV(iRegUpk->HighDW(dst, this->simdWidth), GenRegister::immud(0));
break;
}
case SEL_OP_BSWAP: {
@@ -621,8 +622,8 @@ namespace gbe
switch (insn.opcode) {
case SEL_OP_I64ADD: {
tmp = GenRegister::retype(tmp, GEN_TYPE_UL);
- GenRegister x = tmp.bottom_half();
- GenRegister y = tmp.top_half(this->simdWidth);
+ GenRegister x = iRegUpk->LowDW(tmp);
+ GenRegister y = iRegUpk->HighDW(tmp, this->simdWidth);
loadBottomHalf(x, src0);
loadBottomHalf(y, src1);
@@ -637,8 +638,8 @@ namespace gbe
}
case SEL_OP_I64SUB: {
tmp = GenRegister::retype(tmp, GEN_TYPE_UL);
- GenRegister x = tmp.bottom_half();
- GenRegister y = tmp.top_half(this->simdWidth);
+ GenRegister x = iRegUpk->LowDW(tmp);
+ GenRegister y = iRegUpk->HighDW(tmp, this->simdWidth);
loadBottomHalf(x, src0);
loadBottomHalf(y, src1);
@@ -751,8 +752,8 @@ namespace gbe
case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
case SEL_OP_SEL_INT64:
{
- p->SEL(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
- p->SEL(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
+ p->SEL(iRegUpk->LowDW(dst), iRegUpk->LowDW(src0), iRegUpk->LowDW(src1));
+ p->SEL(iRegUpk->HighDW(dst, this->simdWidth), iRegUpk->HighDW(src0, this->simdWidth), iRegUpk->HighDW(src1, this->simdWidth));
}
break;
case SEL_OP_AND: p->AND(dst, src0, src1, insn.extra.function); break;
@@ -760,20 +761,20 @@ namespace gbe
case SEL_OP_XOR: p->XOR(dst, src0, src1, insn.extra.function); break;
case SEL_OP_I64AND:
{
- p->AND(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
- p->AND(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
+ p->AND(iRegUpk->LowDW(dst), iRegUpk->LowDW(src0), iRegUpk->LowDW(src1));
+ p->AND(iRegUpk->HighDW(dst, this->simdWidth), iRegUpk->HighDW(src0, this->simdWidth), iRegUpk->HighDW(src1, this->simdWidth));
}
break;
case SEL_OP_I64OR:
{
- p->OR(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
- p->OR(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
+ p->OR(iRegUpk->LowDW(dst), iRegUpk->LowDW(src0), iRegUpk->LowDW(src1));
+ p->OR(iRegUpk->HighDW(dst, this->simdWidth), iRegUpk->HighDW(src0, this->simdWidth), iRegUpk->HighDW(src1, this->simdWidth));
}
break;
case SEL_OP_I64XOR:
{
- p->XOR(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
- p->XOR(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
+ p->XOR(iRegUpk->LowDW(dst), iRegUpk->LowDW(src0), iRegUpk->LowDW(src1));
+ p->XOR(iRegUpk->HighDW(dst, this->simdWidth), iRegUpk->HighDW(src0, this->simdWidth), iRegUpk->HighDW(src1, this->simdWidth));
}
break;
case SEL_OP_SHR: p->SHR(dst, src0, src1); break;
@@ -789,8 +790,8 @@ namespace gbe
GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
- p->MOV(xdst.top_half(this->simdWidth), xsrc0.bottom_half());
- p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
+ p->MOV(iRegUpk->HighDW(xdst, this->simdWidth), iRegUpk->LowDW(xsrc0));
+ p->MOV(iRegUpk->LowDW(xdst), iRegUpk->LowDW(xsrc1));
}
break;
default: NOT_IMPLEMENTED;
@@ -801,7 +802,7 @@ namespace gbe
p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->AND(dest, src.bottom_half(), GenRegister::immud(63));
+ p->AND(dest, iRegUpk->LowDW(src), GenRegister::immud(63));
p->pop();
}
@@ -1557,19 +1558,19 @@ namespace gbe
}
void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
- p->MOV(dest, src.top_half(this->simdWidth));
+ p->MOV(dest, iRegUpk->HighDW(src, this->simdWidth));
}
void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
- p->MOV(dest.top_half(this->simdWidth), src);
+ p->MOV(iRegUpk->HighDW(dest, this->simdWidth), src);
}
void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
- p->MOV(dest, src.bottom_half());
+ p->MOV(dest, iRegUpk->LowDW(src));
}
void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
- p->MOV(dest.bottom_half(), src);
+ p->MOV(iRegUpk->LowDW(dest), src);
}
void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
@@ -3504,11 +3505,11 @@ namespace gbe
void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& data,
GenRegister& src, uint32_t bti, bool useSends) {
- p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half());
+ p->MOV(GenRegister::retype(data, GEN_TYPE_UD), iRegUpk->LowDW(src));
p->UNTYPED_WRITE(addr, data, GenRegister::immud(bti), 1, useSends);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
- p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this->simdWidth));
+ p->MOV(GenRegister::retype(data, GEN_TYPE_UD), iRegUpk->HighDW(src, this->simdWidth));
p->UNTYPED_WRITE(addr, data, GenRegister::immud(bti), 1, useSends);
p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
}
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 269b16a..21939ec 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -29,6 +29,7 @@
#include "backend/gen7_encoder.hpp"
#include "backend/program.h"
#include "backend/gen_register.hpp"
+#include "backend/gen_reg_relayout.hpp"
#include "ir/function.hpp"
#include "ir/liveness.hpp"
#include "sys/map.hpp"
@@ -226,11 +227,13 @@ namespace gbe
uint32_t reservedSpillRegs;
bool limitRegisterPressure;
bool relaxMath;
+ IGenReg64Unpack *iRegUpk;
bool getIFENDIFFix(void) const { return ifEndifFix; }
void setIFENDIFFix(bool fix) { ifEndifFix = fix; }
bool getProfilingMode(void) const { return inProfilingMode; }
void setProfilingMode(bool b) { inProfilingMode = b; }
CompileErrorCode getErrCode() { return errCode; }
+ void setRegUnpackMethod(IGenReg64Unpack *iReg) {iRegUpk = iReg;};
protected:
virtual GenEncoder* generateEncoder(void) {
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 217a2d8..3bf50b9 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -700,8 +700,8 @@ namespace gbe
bool GenEncoder::canHandleLong(uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1)
{
/* By now, just alu1 insn will come to here. So just MOV */
- this->MOV(dst.bottom_half(), src0.bottom_half());
- this->MOV(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth));
+ this->MOV(iRegUpk->LowDW(dst), iRegUpk->LowDW(src0));
+ this->MOV(iRegUpk->HighDW(dst, this->simdWidth), iRegUpk->HighDW(src0, this->simdWidth));
return true;
}
@@ -819,8 +819,8 @@ namespace gbe
void GenEncoder::LOAD_INT64_IMM(GenRegister dest, GenRegister value) {
GenRegister u0 = GenRegister::immd((int)value.value.i64), u1 = GenRegister::immd(value.value.i64 >> 32);
- MOV(dest.bottom_half(), u0);
- MOV(dest.top_half(this->simdWidth), u1);
+ MOV(iRegUpk->LowDW(dest), u0);
+ MOV(iRegUpk->HighDW(dest, this->simdWidth), u1);
}
void GenEncoder::F16TO32(GenRegister dest, GenRegister src0) {
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 040b94a..5067ce5 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -53,6 +53,7 @@
#include "backend/gen_defs.hpp"
#include "backend/gen_register.hpp"
+#include "backend/gen_reg_relayout.hpp"
#include "sys/platform.hpp"
#include "sys/vector.hpp"
#include <cassert>
@@ -90,6 +91,8 @@ namespace gbe
uint32_t simdWidth;
DebugInfo DBGInfo;
vector<DebugInfo> storedbg;
+ IGenReg64Unpack *iRegUpk;
+
void setDBGInfo(DebugInfo in, bool hasHigh);
////////////////////////////////////////////////////////////////////////
// Encoding functions
@@ -307,6 +310,8 @@ namespace gbe
virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t ow_size);
/*! A64 OBlock write */
virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t ow_size);
+ /* for unpack different reg64 layout*/
+ void setReg64UnpackMethod(IGenReg64Unpack *iReg) {iRegUpk = iReg;}
GBE_CLASS(GenEncoder); //!< Use custom allocators
virtual void alu3(uint32_t opcode, GenRegister dst,
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 073ede6..12078dc 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -53,6 +53,7 @@
#include "backend/gen_defs.hpp"
#include "backend/gen/gen_mesa_disasm.h"
#include "backend/gen_reg_allocation.hpp"
+#include "backend/gen_reg_relayout.hpp"
#include "ir/unit.hpp"
#ifdef GBE_COMPILER_AVAILABLE
@@ -163,6 +164,7 @@ namespace gbe {
uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]);
uint32_t codeGen = 0;
GenContext *ctx = NULL;
+ IGenReg64Unpack *iReg64Unpack = NULL;
if (fn->getSimdWidth() == 8) {
codeGen = 1;
} else if (fn->getSimdWidth() == 16) {
@@ -191,6 +193,13 @@ namespace gbe {
}
GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
+ if(IS_IVYBRIDGE(deviceID) || IS_HASWELL(deviceID))
+ iReg64Unpack = GBE_NEW(IGenReg64Unpack);
+ else
+ iReg64Unpack = GBE_NEW(IGen8Reg64Unpack);
+
+ ctx->setRegUnpackMethod(iReg64Unpack);
+
if (profiling) {
ctx->setProfilingMode(true);
unit.getProfilingInfo()->setDeviceID(deviceID);
diff --git a/backend/src/backend/gen_reg_relayout.cpp b/backend/src/backend/gen_reg_relayout.cpp
new file mode 100644
index 0000000..a01f4ac
--- /dev/null
+++ b/backend/src/backend/gen_reg_relayout.cpp
@@ -0,0 +1,81 @@
+/*
+ Copyright (C) Intel Corp. 2017. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+#include "backend/gen_reg_relayout.hpp"
+
+namespace gbe
+{
+
+ GenRegister IGenReg64Unpack::HighDW(GenRegister reg, int simd)
+ {
+ reg.type = (reg.type == GEN_TYPE_UL) ? GEN_TYPE_UD : GEN_TYPE_D;
+
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0){
+ reg.subnr += simd * typeSize(reg.type) * reg.hstride;
+ reg.nr += reg.subnr / 32;
+ reg.subnr %= 32;
+ } else {
+ reg.subnr += typeSize(reg.type);
+ reg.nr += reg.subnr/32;
+ reg.subnr %= 32;
+ }
+
+ return reg;
+ }
+
+ GenRegister IGenReg64Unpack::LowDW(GenRegister reg)
+ {
+ reg.type = (reg.type == GEN_TYPE_UL) ? GEN_TYPE_UD : GEN_TYPE_D;
+ return reg;
+ }
+
+ GenRegister IGen8Reg64Unpack::HighDW(GenRegister reg, int simd)
+ {
+ reg.type = (reg.type == GEN_TYPE_UL) ? GEN_TYPE_UD : GEN_TYPE_D;
+
+ reg.subnr += typeSize(reg.type);
+ reg.nr += reg.subnr/32;
+ reg.subnr %= 32;
+
+ reg.hstride = GEN_HORIZONTAL_STRIDE_2;
+ reg.width = GEN_WIDTH_4;
+ reg.vstride = GEN_VERTICAL_STRIDE_8;
+
+ return reg;
+ }
+
+ GenRegister IGen8Reg64Unpack::LowDW(GenRegister reg)
+ {
+ reg.type = (reg.type == GEN_TYPE_UL) ? GEN_TYPE_UD : GEN_TYPE_D;
+ reg.hstride = GEN_HORIZONTAL_STRIDE_2;
+ reg.width = GEN_WIDTH_4;
+ reg.vstride = GEN_VERTICAL_STRIDE_8;
+
+ return reg;
+ }
+
+} /* End of the name space. */
diff --git a/backend/src/backend/gen_reg_relayout.hpp b/backend/src/backend/gen_reg_relayout.hpp
new file mode 100644
index 0000000..592fd5e
--- /dev/null
+++ b/backend/src/backend/gen_reg_relayout.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/**
+ * \file gen_reg_relayout.hpp
+ */
+#ifndef __GBE_GEN_REG_RELAYOUT_HPP__
+#define __GBE_GEN_REG_RELAYOUT_HPP__
+
+#include "backend/gen_register.hpp"
+
+namespace gbe
+{
+ /*for Gen7x, the layout of long or double is like:
+ L0L1L2L3L4L5L6L7
+ H0H1H2H3H4H5H6H7,
+
+ but for Gen8x or later, the layout is :
+ H0L0H1L1H2L2H3L3
+ H4L4H5L5H6L6H7L7
+
+ the unpack class are like just an interface
+ */
+ class IGenReg64Unpack
+ {
+ public:
+ virtual GenRegister HighDW(GenRegister reg, int simd);
+ virtual GenRegister LowDW(GenRegister reg);
+ };
+
+ class IGen8Reg64Unpack : public IGenReg64Unpack
+ {
+ public:
+ virtual GenRegister HighDW(GenRegister reg, int simd);
+ virtual GenRegister LowDW(GenRegister reg);
+ };
+
+}
+#endif /* __GBE_GEN8_ENCODER_HPP__ */
--
2.7.4
More information about the Beignet
mailing list