[Beignet] [PATCH] GBE: optimize a special case of convert INT64 to float.
Zhigang Gong
zhigang.gong at intel.com
Thu Jun 5 01:30:31 PDT 2014
We found the following instruction sequence is common
in luxmark:
CVT.int64.uin32 %75 %74
LOADI.int64 %537 16777215
AND.int64 %76 %75 %537
CVT.float.uin64 %77 %76
Actually, the immediate value is a pure 32 bit value,
and the %74 is also a uint32 bit value. The AND instruction
will not touch the high 32 bit as well. So we can simply optimize
the above instruction series to the follow:
AND.uint32 %tmp %74 16777215
MOV.float %77 %tmp
This way, it will finally save about 55 instructions for each
of the above case. This patch could bring about 8% performance
gain with sala scene in luxmark.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 105 ++++++++++++++++++++++++-----
1 file changed, 89 insertions(+), 16 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 3530d2c..2d380e4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -103,6 +103,7 @@
#include "sys/cvar.hpp"
#include "sys/vector.hpp"
#include <algorithm>
+#include <climits>
namespace gbe
{
@@ -1809,8 +1810,10 @@ namespace gbe
}
/*! Call the child method with the proper prototype */
virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const {
- if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn))) {
- markAllChildren(dag);
+ bool markChildren = true;
+ if (static_cast<const T*>(this)->emitOne(sel, ir::cast<U>(dag.insn), markChildren)) {
+ if (markChildren)
+ markAllChildren(dag);
return true;
}
return false;
@@ -1839,7 +1842,7 @@ namespace gbe
return ir::TYPE_FLOAT;
}
- INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn) const {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::UnaryInstruction &insn, bool &markChildren) const {
const ir::Opcode opcode = insn.getOpcode();
const ir::Type insnType = insn.getType();
const GenRegister dst = sel.selReg(insn.getDst(0), getType(opcode, insnType));
@@ -2590,7 +2593,7 @@ namespace gbe
#define DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FAMILY) \
struct FAMILY##Pattern : public OneToManyPattern<FAMILY##Pattern, ir::FAMILY>\
{\
- INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn) const {\
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::FAMILY &insn, bool &markChildren) const {\
NOT_IMPLEMENTED;\
return false;\
}\
@@ -2601,7 +2604,7 @@ namespace gbe
/*! Load immediate pattern */
DECL_PATTERN(LoadImmInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadImmInstruction &insn, bool &markChildren) const
{
using namespace ir;
const Type type = insn.getType();
@@ -2649,7 +2652,7 @@ namespace gbe
/*! Sync instruction */
DECL_PATTERN(SyncInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn, bool &markChildren) const
{
using namespace ir;
const ir::Register reg = sel.reg(FAMILY_DWORD);
@@ -2821,7 +2824,7 @@ namespace gbe
sel.INDIRECT_MOVE(dst, src);
}
- INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn, bool &markChildren) const {
using namespace ir;
GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
const AddressSpace space = insn.getAddressSpace();
@@ -2939,7 +2942,7 @@ namespace gbe
}
}
- INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::StoreInstruction &insn, bool &markChildren) const
{
using namespace ir;
const AddressSpace space = insn.getAddressSpace();
@@ -3045,7 +3048,7 @@ namespace gbe
/*! Bit cast instruction pattern */
DECL_PATTERN(BitCastInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::BitCastInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::BitCastInstruction &insn, bool &markChildren) const
{
using namespace ir;
const Type dstType = insn.getDstType();
@@ -3154,7 +3157,48 @@ namespace gbe
/*! Convert instruction pattern */
DECL_PATTERN(ConvertInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn) const
+
+ INLINE bool lowerI64Reg(Selection::Opaque &sel, SelectionDAG *dag, GenRegister &src, uint32_t type) const {
+ using namespace ir;
+ GBE_ASSERT(type == GEN_TYPE_UD || type == GEN_TYPE_F);
+ if (dag->insn.getOpcode() == OP_LOADI) {
+ const auto &immInsn = cast<LoadImmInstruction>(dag->insn);
+ const auto imm = immInsn.getImmediate();
+ const Type immType = immInsn.getType();
+ if (immType == TYPE_S64 &&
+ imm.data.s64 <= INT_MAX &&
+ imm.data.s64 >= INT_MIN) {
+ src = GenRegister::immd((int32_t)imm.data.s64);
+ return true;
+ } else if (immType == TYPE_U64 &&
+ imm.data.u64 <= UINT_MAX) {
+ src = GenRegister::immud((uint32_t)imm.data.s64);
+ return true;
+ }
+ } else if (dag->insn.getOpcode() == OP_CVT) {
+ const auto cvtInsn = cast<ConvertInstruction>(dag->insn);
+ auto srcType = cvtInsn.getSrcType();
+ if (((srcType == TYPE_U32 || srcType == TYPE_S32) &&
+ (type == GEN_TYPE_UD || type == GEN_TYPE_D)) ||
+ ((srcType == TYPE_FLOAT) && type == GEN_TYPE_F)) {
+ src = GenRegister::retype(sel.selReg(cvtInsn.getSrc(0), srcType), type);
+ dag->isRoot = 1;
+ return true;
+ } else if (srcType == TYPE_FLOAT ||
+ srcType == TYPE_U16 ||
+ srcType == TYPE_S16 ||
+ srcType == TYPE_U32 ||
+ srcType == TYPE_S32) {
+ src = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32), type);
+ dag->isRoot = 1;
+ sel.MOV(src, sel.selReg(cvtInsn.getSrc(0), srcType));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::ConvertInstruction &insn, bool &markChildren) const
{
using namespace ir;
const Type dstType = insn.getDstType();
@@ -3225,6 +3269,35 @@ namespace gbe
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
} else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) {
+ auto dag = sel.regDAG[src.reg()];
+ SelectionDAG *dag0, *dag1;
+ if (dag->child[0]->insn.getOpcode() == OP_LOADI) {
+ dag0 = dag->child[1];
+ dag1 = dag->child[0];
+ } else {
+ dag0 = dag->child[0];
+ dag1 = dag->child[1];
+ }
+ GBE_ASSERT(!(dag->child[0]->insn.getOpcode() == OP_LOADI &&
+ dag->child[1]->insn.getOpcode() == OP_LOADI));
+ if (dag->insn.getOpcode() == OP_AND ||
+ dag->insn.getOpcode() == OP_OR ||
+ dag->insn.getOpcode() == OP_XOR) {
+ GenRegister src0;
+ GenRegister src1;
+ if (lowerI64Reg(sel, dag0, src0, GEN_TYPE_UD) &&
+ lowerI64Reg(sel, dag1, src1, GEN_TYPE_UD)) {
+ switch (dag->insn.getOpcode()) {
+ default:
+ case OP_AND: sel.AND(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+ case OP_OR: sel.OR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+ case OP_XOR: sel.XOR(GenRegister::retype(dst, GEN_TYPE_UD), src0, src1); break;
+ }
+ sel.MOV(dst, GenRegister::retype(dst, GEN_TYPE_UD));
+ markChildren = false;
+ return true;
+ }
+ }
GenRegister tmp[6];
for(int i=0; i<6; i++) {
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
@@ -3269,7 +3342,7 @@ namespace gbe
/*! Convert instruction pattern */
DECL_PATTERN(AtomicInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::AtomicInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::AtomicInstruction &insn, bool &markChildren) const
{
using namespace ir;
const AtomicOps atomicOp = insn.getAtomicOpcode();
@@ -3346,7 +3419,7 @@ namespace gbe
DECL_PATTERN(TernaryInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn) const {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::TernaryInstruction &insn, bool &markChildren) const {
using namespace ir;
const Type type = insn.getType();
const GenRegister dst = sel.selReg(insn.getDst(0), type),
@@ -3386,7 +3459,7 @@ namespace gbe
/*! Label instruction pattern */
DECL_PATTERN(LabelInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::LabelInstruction &insn, bool &markChildren) const
{
using namespace ir;
const LabelIndex label = insn.getLabelIndex();
@@ -3477,7 +3550,7 @@ namespace gbe
DECL_PATTERN(SampleInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::SampleInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::SampleInstruction &insn, bool &markChildren) const
{
using namespace ir;
GenRegister msgPayloads[4];
@@ -3522,7 +3595,7 @@ namespace gbe
/*! Typed write instruction pattern. */
DECL_PATTERN(TypedWriteInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn, bool &markChildren) const
{
using namespace ir;
const uint32_t simdWidth = sel.ctx.getSimdWidth();
@@ -3605,7 +3678,7 @@ namespace gbe
/*! get image info instruction pattern. */
DECL_PATTERN(GetImageInfoInstruction)
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn, bool &markChildren) const
{
using namespace ir;
GenRegister dst;
--
1.8.3.2
More information about the Beignet
mailing list