[Beignet] [PATCH] backend: add global immediate optimization
rander.wang
rander.wang at intel.com
Sat May 27 02:42:32 UTC 2017
there are some global immediates in global var list of LLVM.
these imm can be integrated in instructions. for compiler_global_immediate_optimized test
in utest, there are two global immediates:
L0:
MOV(1) %42<0>:UD : 0x0:UD
MOV(1) %43<0>:UD : 0x30:UD
used by:
ADD(16) %49<1>:D : %42<0,1,0>:D %48<8,8,1>:D
ADD(16) %54<1>:D : %43<0,1,0>:D %53<8,8,1>:D
it can be
ADD(16) %49<1>:D : %48<8,8,1>:D 0x0:UD
ADD(16) %54<1>:D : %53<8,8,1>:D 0x30:UD
Then the MOV can be removed. And after this optimization, ADD 0 can be change
to MOV, then local copy propagation can be done.
Signed-off-by: rander.wang <rander.wang at intel.com>
---
.../src/backend/gen_insn_selection_optimize.cpp | 278 ++++++++++++++++++++-
1 file changed, 277 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index d2e0fb9..bcaa583 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -64,7 +64,7 @@ namespace gbe
~SelBasicBlockOptimizer() {}
virtual void run();
- private:
+ protected:
// local copy propagation
class ReplaceInfo
{
@@ -90,6 +90,7 @@ namespace gbe
uint32_t elements;
const GenRegister& replacement;
set<GenRegister*> toBeReplaceds;
+ set<SelectionInstruction*> toBeReplacedInsns;
bool replacementOverwritten;
GBE_CLASS(ReplaceInfo);
};
@@ -114,6 +115,99 @@ namespace gbe
for (GenRegister* reg : info->toBeReplaceds) {
GenRegister::propagateRegister(*reg, info->replacement);
}
+
+ //after imm opt, maybe both src are imm, convert it to mov
+ for (SelectionInstruction* insn : info->toBeReplacedInsns) {
+ GenRegister& src0 = insn->src(0);
+ GenRegister& src1 = insn->src(1);
+ if (src0.file == GEN_IMMEDIATE_VALUE)
+ {
+ if (src1.file == GEN_IMMEDIATE_VALUE)
+ {
+ if (src0.type == GEN_TYPE_F)
+ {
+ float s0 = src0.value.f;
+ if (src0.absolute)
+ s0 = fabs(s0);
+ if (src0.negation)
+ s0 = -s0;
+
+ float s1 = src1.value.f;
+ if (src1.absolute)
+ s1 = fabs(s1);
+ if (src1.negation)
+ s1 = -s1;
+
+ float result;
+ switch(insn->opcode)
+ {
+ case SEL_OP_ADD:
+ result = s0 + s1;
+ break;
+ case SEL_OP_MUL:
+ result = s0 * s1;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ insn->src(0) = GenRegister::immf(result);
+ }
+ else if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD)
+ {
+ int s0 = src0.value.d;
+ if (src0.absolute)
+ s0 = fabs(s0);
+ if (src0.negation)
+ s0 = -s0;
+
+ int s1 = src1.value.d;
+ if (src1.absolute)
+ s1 = fabs(s1);
+ if (src1.negation)
+ s1 = -s1;
+
+ int result;
+ switch(insn->opcode)
+ {
+ case SEL_OP_ADD:
+ result = s0 + s1;
+ break;
+ case SEL_OP_MUL:
+ result = s0 * s1;
+ break;
+ case SEL_OP_AND:
+ result = s0 & s1;
+ break;
+ case SEL_OP_OR:
+ result = s0 | s1;
+ break;
+ case SEL_OP_XOR:
+ result = s0 ^ s1;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ insn->src(0) = GenRegister::immd(result);
+ }
+
+ insn->opcode = SEL_OP_MOV;
+ insn->srcNum = 1;
+ }
+ else
+ {
+ //src0 cant be immediate, so exchange with src1
+ GenRegister tmp;
+ tmp = src0;
+ src0 = src1;
+ src1 = tmp;
+ }
+ }
+ }
+
bb.insnList.erase(&(info->insn));
optimized = true;
}
@@ -274,6 +368,184 @@ namespace gbe
virtual void run();
};
+ class SelGlobalImmMovOpt : public SelBasicBlockOptimizer
+ {
+ public:
+ SelGlobalImmMovOpt(const GenContext& ctx, uint32_t features, intrusive_list<SelectionBlock> *blockList) :
+ SelBasicBlockOptimizer(ctx, ctx.getLiveOut((*blockList->begin()).bb), features, *blockList->begin())
+ {
+ mblockList = blockList;
+ }
+
+ virtual void run();
+
+ void addToReplaceInfoMap(SelectionInstruction& insn);
+ void doGlobalCopyPropagation();
+ bool CanBeReplaced(const ReplaceInfo* info, SelectionInstruction& insn, const GenRegister& var);
+
+ private:
+ intrusive_list<SelectionBlock> *mblockList;
+ };
+
+ extern void outputSelectionInst(SelectionInstruction &insn);
+
+ void SelGlobalImmMovOpt::addToReplaceInfoMap(SelectionInstruction& insn)
+ {
+ assert(insn.opcode == SEL_OP_MOV);
+ const GenRegister& src = insn.src(0);
+ const GenRegister& dst = insn.dst(0);
+ if (src.type != dst.type)
+ return;
+
+ ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
+ replaceInfoMap[dst.reg()] = info;
+ }
+
+ bool SelGlobalImmMovOpt::CanBeReplaced(const ReplaceInfo* info, SelectionInstruction& insn, const GenRegister& var)
+ {
+ switch(insn.opcode)
+ {
+ // imm source is supported by these instructions. And
+ // the src operators can be exchange in these instructions
+ case SEL_OP_ADD:
+ case SEL_OP_MUL:
+ case SEL_OP_AND:
+ case SEL_OP_OR:
+ case SEL_OP_XOR:
+ break;
+ default:
+ return false;
+ }
+
+ if(info->intermedia.type != var.type)
+ {
+ bool isOk = false;
+ assert(info->replacement.file == GEN_IMMEDIATE_VALUE);
+ if(var.type == GEN_TYPE_D && info->intermedia.type == GEN_TYPE_UD)
+ {
+ if(info->replacement.value.ud >= 0x80000000)
+ return false;
+ isOk = true;
+ }
+
+ if(var.type == GEN_TYPE_UD && info->intermedia.type == GEN_TYPE_D)
+ {
+ if(info->replacement.value.d < 0)
+ return false;
+ isOk = true;
+ }
+
+ if(var.file == GEN_IMMEDIATE_VALUE)
+ isOk = false;
+
+ if(!isOk) return false;
+ }
+
+ if (info->intermedia.quarter == var.quarter &&
+ info->intermedia.subnr == var.subnr &&
+ info->intermedia.nr == var.nr)
+ {
+ uint32_t elements = CalculateElements(var, insn.state.execWidth); //considering width, hstrid, vstrid and execWidth
+ if (info->elements != elements)
+ return false;
+ }
+
+#ifdef DEBUG_GLOBAL_IMM_OPT
+ outputSelectionInst(insn);
+#endif
+
+ return true;
+ }
+
+ void SelGlobalImmMovOpt::doGlobalCopyPropagation()
+ {
+ for(SelectionBlock &block : *mblockList)
+ {
+ for (SelectionInstruction &insn :block.insnList)
+ {
+ for (uint8_t i = 0; i < insn.srcNum; ++i)
+ {
+ ReplaceInfoMap::iterator it = replaceInfoMap.find(insn.src(i).reg());
+ if (it != replaceInfoMap.end())
+ {
+ ReplaceInfo *info = it->second;
+ if (CanBeReplaced(info, insn, insn.src(i)))
+ {
+ info->toBeReplaceds.insert(&insn.src(i));
+ info->toBeReplacedInsns.insert(&insn);
+ }
+ else
+ {
+ replaceInfoMap.erase(it);
+ delete info;
+ }
+ }
+ }
+
+ for (uint8_t i = 0; i < insn.dstNum; ++i)
+ {
+ ReplaceInfoMap::iterator it = replaceInfoMap.find(insn.dst(i).reg());
+ if (it != replaceInfoMap.end())
+ {
+ ReplaceInfo *info = it->second;
+ if(&(info->insn) != &insn)
+ {
+ replaceInfoMap.erase(it);
+ delete info;
+ }
+ }
+ }
+ }
+
+ if(replaceInfoMap.empty())
+ break;
+ }
+
+ cleanReplaceInfoMap();
+ }
+
+ void SelGlobalImmMovOpt::run()
+ {
+ bool canbeOpt = false;
+
+ /*global immediates are set in entry block, the following is example of GEN IR
+
+ decl_input.global %41 dst
+ ## 0 output register ##
+ ## 0 pushed register
+ ## 3 blocks ##
+ LABEL $0
+ LOADI.uint32 %42 0
+ LOADI.uint32 %43 48
+
+ LABEL $1
+ MUL.int32 %44 %3 %12
+ ADD.int32 %49 %42 %48
+ ...
+ */
+ SelectionBlock &block = *mblockList->begin();
+ for(SelectionInstruction &insn : block.insnList)
+ {
+ GenRegister src0 = insn.src(0);
+ if(insn.opcode == SEL_OP_MOV &&
+ src0.file == GEN_IMMEDIATE_VALUE &&
+ (src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) &&
+ insn.state.predicate == GEN_PREDICATE_NONE)
+ {
+ addToReplaceInfoMap(insn);
+ canbeOpt = true;
+
+#ifdef DEBUG_GLOBAL_IMM_OPT
+ outputSelectionInst(insn);
+#endif
+ }
+ }
+
+ if(!canbeOpt) return;
+
+ doGlobalCopyPropagation();
+ }
+
void SelGlobalOptimizer::run()
{
@@ -281,6 +553,10 @@ namespace gbe
void Selection::optimize()
{
+ //do global imm opt first to make more local opt
+ SelGlobalImmMovOpt gopt(getCtx(), opt_features, blockList);
+ gopt.run();
+
//do basic block level optimization
for (SelectionBlock &block : *blockList) {
SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
--
2.7.4
More information about the Beignet
mailing list