[Beignet] [Patch V2 1/3] OCL20/GBE: Fix 64bit pointer issue in Load store instruction selection.
Luo, Xionghu
xionghu.luo at intel.com
Sun Dec 6 19:25:25 PST 2015
This patchset LGTM.
Since it works only for BDW or later, need add assert in runtime for HSW or earlier platforms in future.
Luo Xionghu
Best Regards
-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Yang Rong
Sent: Friday, December 4, 2015 4:31 PM
To: beignet at lists.freedesktop.org
Cc: Song, Ruiling
Subject: [Beignet] [Patch V2 1/3] OCL20/GBE: Fix 64bit pointer issue in Load store instruction selection.
From: Ruiling Song <ruiling.song at intel.com>
previously we do not handle 64bit pointer correctly.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 79 ++++++++++++++++++++++--------
1 file changed, 59 insertions(+), 20 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 7a23892..1d4fcda 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3720,7 +3720,13 @@ namespace gbe
if (sel.isScalarReg(addr.reg())) {
sel.curr.noMask = 1;
}
- sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
+ if (sel.getRegisterFamily(addr.reg())) {
+ // as we still use offset instead of absolut graphics address,
+ // it is safe to convert from u64 to u32
+ GenRegister t = convertU64ToU32(sel, addr);
+ sel.SHR(addrDW, t, GenRegister::immud(2));
+ } else
+ sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD),
+ GenRegister::immud(2));
sel.pop();
sel.DWORD_GATHER(dst, addrDW, BTI_CONSTANT); @@ -3797,6 +3803,7 @@ namespace gbe
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
bool isUniform = sel.isScalarReg(insn.getValue(0));
+ unsigned addrBytes = typeSize(addr.type);
AddressMode AM = insn.getAddressMode();
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
sel.push();
@@ -3814,7 +3821,10 @@ namespace gbe
read64Legacy(sel, addr, dst, b, btiTemp);
} else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) {
GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? 0xfe : BTI_CONSTANT);
- read64Legacy(sel, addr, dst, b, btiTemp);
+ GenRegister addrDW = addr;
+ if (addrBytes == 8)
+ addrDW = convertU64ToU32(sel, addr);
+ read64Legacy(sel, addrDW, dst, b, btiTemp);
} else {
read64Stateless(sel, addr, dst);
}
@@ -3830,9 +3840,12 @@ namespace gbe
ir::AddressSpace addrSpace) const
{
using namespace ir;
- Register tmpReg = sel.reg(FAMILY_DWORD);
- GenRegister tmpAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+ Type addrType = getType(addrFamily);
+ Register tmpReg = sel.reg(FAMILY_DWORD, isUniform);
+ GenRegister tmpAddr = sel.selReg(sel.reg(addrFamily,
+ isUniform), addrType);
GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
+ GenRegister addrOffset = sel.selReg(sel.reg(FAMILY_DWORD,
+ isUniform), ir::TYPE_U32);
// Get dword aligned addr
sel.push();
@@ -3840,7 +3853,11 @@ namespace gbe
sel.curr.noMask = 1;
sel.curr.execWidth = 1;
}
- sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+ if (addrFamily == FAMILY_DWORD)
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+ else
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UL),
+ GenRegister::immuint64(0xfffffffffffffffc));
+
sel.pop();
sel.push();
vector<GenRegister> tmp;
@@ -3852,9 +3869,13 @@ namespace gbe
if (isUniform)
sel.curr.execWidth = 1;
// Get the remaining offset from aligned addr
- sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
- sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
- sel.SHR(tmpData, tmpData, tmpAddr);
+ if (addrFamily == FAMILY_QWORD) {
+ sel.AND(addrOffset, sel.unpacked_ud(address.reg()), GenRegister::immud(0x3));
+ } else {
+ sel.AND(addrOffset, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+ }
+ sel.SHL(addrOffset, addrOffset, GenRegister::immud(0x3));
+ sel.SHR(tmpData, tmpData, addrOffset);
if (elemSize == GEN_BYTE_SCATTER_WORD)
sel.MOV(GenRegister::retype(dst, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg, isUniform, sel.isLongReg(tmpReg))); @@ -3908,6 +3929,7 @@ namespace gbe
using namespace ir;
GBE_ASSERT(effectData.size() == effectDataNum);
GBE_ASSERT(tmp.size() == effectDataNum + 1);
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
sel.push();
Register alignedFlag = sel.reg(FAMILY_BOOL, isUniform);
GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); @@ -3916,7 +3938,12 @@ namespace gbe
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3));
+ if (addrFamily == FAMILY_QWORD) {
+ GenRegister t = convertU64ToU32(sel, address);
+ sel.AND(shiftL, t, GenRegister::immud(0x3));
+ } else {
+ sel.AND(shiftL, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+ }
sel.SHL(shiftL, shiftL, GenRegister::immud(0x3));
sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32));
sel.curr.physicalFlag = 0;
@@ -4026,6 +4053,8 @@ namespace gbe
1 : sel.ctx.getSimdWidth();
const bool isUniform = simdWidth == 1;
RegisterFamily family = getFamily(insn.getValueType());
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+ Type addrType = getType(addrFamily);
if(valueNum > 1) {
GBE_ASSERT(!isUniform && "vector load should not be uniform. Something went wrong."); @@ -4041,11 +4070,14 @@ namespace gbe
for(uint32_t i = 0; i < effectDataNum + 1; i++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
- GenRegister alignedAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+ GenRegister alignedAddr = sel.selReg(sel.reg(addrFamily,
+ isUniform), addrType);
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+ if (addrFamily == FAMILY_DWORD)
+ sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+ else
+ sel.AND(alignedAddr, GenRegister::retype(address,
+ GEN_TYPE_UL), GenRegister::immuint64(~0x3ul));
sel.pop();
uint32_t remainedReg = effectDataNum + 1; @@ -4057,7 +4089,10 @@ namespace gbe
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+ if (addrFamily == FAMILY_DWORD)
+ sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+ else
+ sel.ADD(alignedAddr, alignedAddr,
+ GenRegister::immuint64(pos * 4));
sel.pop();
}
shootUntypedReadMsg(sel, insn, t1, alignedAddr, width, addrSpace); @@ -4110,7 +4145,8 @@ namespace gbe
{
using namespace ir;
const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn);
- GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+ Register reg = insn.getAddressRegister();
+ GenRegister address = sel.selReg(reg,
+ getType(sel.getRegisterFamily(reg)));
GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
insn.getAddressSpace() == MEM_CONSTANT ||
insn.getAddressSpace() == MEM_PRIVATE || @@ -4198,7 +4234,6 @@ namespace gbe
for (unsigned k = 0; k < (valueNum+1)/2+1; k++) {
msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
}
- bool valueScalar = sel.isScalarReg(value[0].reg());
sel.push();
/* do first quarter */
sel.curr.execWidth = 8;
@@ -4232,8 +4267,6 @@ namespace gbe
AddressMode AM = insn.getAddressMode();
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
- bool addrScalar = sel.isScalarReg(address.reg());
-
if (AM == AM_DynamicBti || AM == AM_StaticBti) {
if (AM == AM_DynamicBti) {
Register btiReg = insn.getBtiReg(); @@ -4347,6 +4380,7 @@ namespace gbe
src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
AddressMode AM = insn.getAddressMode();
+ unsigned int addrBytes = typeSize(address.type);
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
if (AM != AM_Stateless) {
GenRegister b;
@@ -4356,9 +4390,13 @@ namespace gbe
b = GenRegister::immud(insn.getSurfaceIndex());
}
write64Legacy(sel, address, src, b, btiTemp);
- } else if (addrSpace == MEM_CONSTANT || addrSpace == MEM_LOCAL) {
- GenRegister b = GenRegister::immud(addrSpace == MEM_CONSTANT ? BTI_CONSTANT : 0xfe);
- write64Legacy(sel, address, src, b, btiTemp);
+ } else if (addrSpace == MEM_LOCAL) {
+ GenRegister b = GenRegister::immud(0xfe);
+ GenRegister addr = address;
+ if (addrBytes == 8) {
+ addr = convertU64ToU32(sel, address);
+ }
+ write64Legacy(sel, addr, src, b, btiTemp);
} else {
GBE_ASSERT(sel.hasLongType());
write64Stateless(sel, address, src); @@ -4480,7 +4518,8 @@ namespace gbe
{
using namespace ir;
const ir::StoreInstruction &insn = cast<ir::StoreInstruction>(dag.insn);
- GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+ Register reg = insn.getAddressRegister();
+ GenRegister address = sel.selReg(reg,
+ getType(sel.getRegisterFamily(reg)));
AddressSpace addrSpace = insn.getAddressSpace();
const Type type = insn.getValueType();
const uint32_t elemSize = getByteScatterGatherSize(sel, type);
--
1.9.1
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list