[Beignet] [PATCH V4] backend: add global immediate optimization
Wang, Rander
rander.wang at intel.com
Fri Jun 30 01:46:20 UTC 2017
Hi,
The abs of UD has to be done if it is encoded in instruction no matter it make sense or not.
And I have discussed with my collage and refine it.
First we inspect the HW behavior of ABS(UD), -(UD) and find that ABS(UD) = UD,
-(UD) = the result of -(UD) on CPU.
So the abs calculation can be removed and this will make it compiled pass.
Rander
-----Original Message-----
From: Ivan Shapovalov [mailto:intelfx at intelfx.name]
Sent: Wednesday, June 28, 2017 8:54 AM
To: Wang, Rander <rander.wang at intel.com>; beignet at lists.freedesktop.org
Cc: Song, Ruiling <ruiling.song at intel.com>
Subject: Re: [Beignet] [PATCH V4] backend: add global immediate optimization
On 2017-06-14 at 13:55 +0800, rander.wanga wrote:
> there are some global immediates in global var list of LLVM.
> these imm can be integrated in instructions. for
> compiler_global_immediate_optimized test
> in utest, there are two global immediates:
> L0:
> MOV(1) %42<0>:UD : 0x0:UD
> MOV(1) %43<0>:UD : 0x30:UD
>
> used by:
> ADD(16) %49<1>:D : %42<0,1,0>:D
> %48<8,8,1>:D
> ADD(16) %54<1>:D : %43<0,1,0>:D
> %53<8,8,1>:D
>
> it can be
> ADD(16) %49<1>:D : %48<8,8,1>:D 0x
> 0:UD
> ADD(16) %54<1>:D : %53<8,8,1>:D 0x
> 30:UD
>
> Then the MOV can be removed. And after this optimization, ADD 0 can
> be change
> to MOV, then local copy propagation can be done.
>
> V2: (1) add environment variable to enable/disable the optimization
> (2) refine the architecture of imm optimization, inherit from
> global
> optimizer not local block optimizer
>
> V3: merge with latest master driver
>
> V4: (1)refine some type errors
> (2)remove UD/D check for no need
> (3)refine imm calculate for UD/D
>
> Signed-off-by: rander.wang <rander.wang at intel.com>
> ---
> .../src/backend/gen_insn_selection_optimize.cpp | 367
> +++++++++++++++++++--
> 1 file changed, 342 insertions(+), 25 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp
> b/backend/src/backend/gen_insn_selection_optimize.cpp
> index 07547ec..eb93a20 100644
> --- a/backend/src/backend/gen_insn_selection_optimize.cpp
> +++ b/backend/src/backend/gen_insn_selection_optimize.cpp
> @@ -40,6 +40,33 @@ namespace gbe
> return elements;
> }
>
> + class ReplaceInfo
> + {
> + public:
> + ReplaceInfo(SelectionInstruction &insn,
> + const GenRegister &intermedia,
> + const GenRegister &replacement) : insn(insn),
> intermedia(intermedia), replacement(replacement)
> + {
> + assert(insn.opcode == SEL_OP_MOV || insn.opcode ==
> SEL_OP_ADD);
> + assert(&(insn.dst(0)) == &intermedia);
> + this->elements = CalculateElements(intermedia,
> insn.state.execWidth);
> + replacementOverwritten = false;
> + }
> + ~ReplaceInfo()
> + {
> + this->toBeReplaceds.clear();
> + }
> +
> + SelectionInstruction &insn;
> + const GenRegister &intermedia;
> + uint32_t elements;
> + const GenRegister &replacement;
> + set<GenRegister *> toBeReplaceds;
> + set<SelectionInstruction*> toBeReplacedInsns;
> + bool replacementOverwritten;
> + GBE_CLASS(ReplaceInfo);
> + };
> +
> class SelOptimizer
> {
> public:
> @@ -66,32 +93,7 @@ namespace gbe
>
> private:
> // local copy propagation
> - class ReplaceInfo
> - {
> - public:
> - ReplaceInfo(SelectionInstruction& insn,
> - const GenRegister& intermedia,
> - const GenRegister& replacement) :
> - insn(insn), intermedia(intermedia),
> replacement(replacement)
> - {
> - assert(insn.opcode == SEL_OP_MOV || insn.opcode ==
> SEL_OP_ADD);
> - assert(&(insn.dst(0)) == &intermedia);
> - this->elements = CalculateElements(intermedia,
> insn.state.execWidth);
> - replacementOverwritten = false;
> - }
> - ~ReplaceInfo()
> - {
> - this->toBeReplaceds.clear();
> - }
>
> - SelectionInstruction& insn;
> - const GenRegister& intermedia;
> - uint32_t elements;
> - const GenRegister& replacement;
> - set<GenRegister*> toBeReplaceds;
> - bool replacementOverwritten;
> - GBE_CLASS(ReplaceInfo);
> - };
> typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
> ReplaceInfoMap replaceInfoMap;
> void doLocalCopyPropagation();
> @@ -298,13 +300,328 @@ namespace gbe
> virtual void run();
> };
>
> + class SelGlobalImmMovOpt : public SelGlobalOptimizer {
> + public:
> + SelGlobalImmMovOpt(const GenContext& ctx, uint32_t features,
> intrusive_list<SelectionBlock> *blockList) :
> + SelGlobalOptimizer(ctx, features)
> + {
> + mblockList = blockList;
> + }
> +
> + virtual void run();
> +
> + void addToReplaceInfoMap(SelectionInstruction& insn);
> + void doGlobalCopyPropagation();
> + bool CanBeReplaced(const ReplaceInfo* info,
> SelectionInstruction& insn, const GenRegister& var);
> + void cleanReplaceInfoMap();
> + void doReplacement(ReplaceInfo* info);
> +
> + private:
> + intrusive_list<SelectionBlock> *mblockList;
> +
> + typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
> + ReplaceInfoMap replaceInfoMap;
> +
> + };
> +
> + extern void outputSelectionInst(SelectionInstruction &insn);
> +
> + void SelGlobalImmMovOpt::cleanReplaceInfoMap()
> + {
> + for (auto& pair : replaceInfoMap) {
> + ReplaceInfo* info = pair.second;
> + doReplacement(info);
> + delete info;
> + }
> + replaceInfoMap.clear();
> + }
> +
> +#define RESULT() switch(insn->opcode) \
> + { \
> + case SEL_OP_ADD: \
> + result = s0 + s1; \
> + break; \
> + case SEL_OP_MUL: \
> + result = s0 * s1; \
> + break; \
> + case SEL_OP_AND: \
> + result = s0 & s1; \
> + break; \
> + case SEL_OP_OR: \
> + result = s0 | s1; \
> + break; \
> + case SEL_OP_XOR: \
> + result = s0 ^ s1; \
> + break; \
> + default: \
> + assert(0); \
> + break; \
> + }
> +
> + void SelGlobalImmMovOpt::doReplacement(ReplaceInfo* info) {
> + for (GenRegister* reg : info->toBeReplaceds) {
> + GenRegister::propagateRegister(*reg, info->replacement);
> + }
> +
> + //after imm opt, maybe both src are imm, convert it to mov
> + for (SelectionInstruction* insn : info->toBeReplacedInsns) {
> + GenRegister& src0 = insn->src(0);
> + GenRegister& src1 = insn->src(1);
> + if (src0.file == GEN_IMMEDIATE_VALUE)
> + {
> + if (src1.file == GEN_IMMEDIATE_VALUE)
> + {
> + if (src0.type == GEN_TYPE_F)
> + {
> + float s0 = src0.value.f;
> + if (src0.absolute)
> + s0 = fabs(s0);
> + if (src0.negation)
> + s0 = -s0;
> +
> + float s1 = src1.value.f;
> + if (src1.absolute)
> + s1 = fabs(s1);
> + if (src1.negation)
> + s1 = -s1;
> +
> + float result;
> + switch(insn->opcode)
> + {
> + case SEL_OP_ADD:
> + result = s0 + s1;
> + break;
> + case SEL_OP_MUL:
> + result = s0 * s1;
> + break;
> + default:
> + assert(0);
> + break;
> + }
> +
> + insn->src(0) = GenRegister::immf(result);
> + }
> + else if (src0.type == GEN_TYPE_D && src1.type ==
> GEN_TYPE_D)
> + {
> + int s0 = src0.value.d;
> + if (src0.absolute)
> + s0 = abs(s0);
> + if (src0.negation)
> + s0 = -s0;
> +
> + int s1 = src1.value.d;
> + if (src1.absolute)
> + s1 = abs(s1);
> + if (src1.negation)
> + s1 = -s1;
> +
> + int result;
> + RESULT();
> + insn->src(0) = GenRegister::immd(result);
> + }
> + else if(src0.type == GEN_TYPE_UD || src1.type ==
> GEN_TYPE_UD)
> + {
> + unsigned int s0 = src0.value.ud;
> + if (src0.absolute)
> + s0 = abs(s0);
> + if (src0.negation)
> + s0 = -s0;
Hi,
are you sure this is OK? This piece of code (and the similar one directly below) doesn't compile here:
/tmp/makepkg/build/beignet-git/src/beignet/backend/src/backend/gen_insn_selection_optimize.cpp: In member function 'void gbe::SelGlobalImmMovOpt::doReplacement(gbe::ReplaceInfo*)':
/tmp/makepkg/build/beignet-git/src/beignet/backend/src/backend/gen_insn_selection_optimize.cpp:462:28: error: call of overloaded 'abs(unsigned int&)' is ambiguous
s0 = abs(s0);
I'm not familiar with the ISA that you work with, but the problem seems real (not a false positive), since either of your operands may not be a GEN_TYPE_UD.
Even if you work around the immediate problem by, say, reinterpret_casting to int& and back, the computation inside RESULT() will still yield nonsense if it's multiplication or division and either of the operands is a negative signed int.
(Please include me in replies as I'm not subscribed to the list.)
--
Ivan Shapovalov / intelfx /
> +
> + unsigned int s1 = src1.value.ud;
> + if (src1.absolute)
> + s1 = abs(s1);
> + if (src1.negation)
> + s1 = -s1;
> +
> + unsigned int result;
> + RESULT();
> + insn->src(0) = GenRegister::immud(result);
> + }
> + else
> + {
> + assert(0);
> + }
> +
> + insn->opcode = SEL_OP_MOV;
> + insn->srcNum = 1;
> + }
> + else
> + {
> + //src0 cant be immediate, so exchange with src1
> + GenRegister tmp;
> + tmp = src0;
> + src0 = src1;
> + src1 = tmp;
> + }
> + }
> + }
> +
> + info->insn.parent->insnList.erase(&(info->insn));
> + }
> +
> + void SelGlobalImmMovOpt::addToReplaceInfoMap(SelectionInstruction&
> insn)
> + {
> + assert(insn.opcode == SEL_OP_MOV);
> + const GenRegister& src = insn.src(0);
> + const GenRegister& dst = insn.dst(0);
> + if (src.type != dst.type)
> + return;
> +
> + ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
> + replaceInfoMap[dst.reg()] = info; }
> +
> + bool SelGlobalImmMovOpt::CanBeReplaced(const ReplaceInfo* info,
> SelectionInstruction& insn, const GenRegister& var)
> + {
> + if(var.file == GEN_IMMEDIATE_VALUE)
> + return false;
> +
> + switch(insn.opcode)
> + {
> + // imm source is supported by these instructions. And
> + // the src operators can be exchange in these instructions
> + case SEL_OP_ADD:
> + case SEL_OP_MUL:
> + case SEL_OP_AND:
> + case SEL_OP_OR:
> + case SEL_OP_XOR:
> + break;
> + default:
> + return false;
> + }
> +
> + if(info->intermedia.type != var.type)
> + {
> + assert(info->replacement.file == GEN_IMMEDIATE_VALUE);
> + if(!((var.type == GEN_TYPE_D && info->intermedia.type ==
> GEN_TYPE_UD)
> + || (var.type == GEN_TYPE_UD && info->intermedia.type ==
> GEN_TYPE_D)))
> + {
> + return false;
> + }
> + }
> +
> + if (info->intermedia.quarter == var.quarter &&
> + info->intermedia.subnr == var.subnr &&
> + info->intermedia.nr == var.nr)
> + {
> + uint32_t elements = CalculateElements(var,
> insn.state.execWidth); //considering width, hstrid, vstrid and
> execWidth
> + if (info->elements != elements)
> + return false;
> + }
> +
> +#ifdef DEBUG_GLOBAL_IMM_OPT
> + outputSelectionInst(insn);
> +#endif
> +
> + return true;
> + }
> +
> + void SelGlobalImmMovOpt::doGlobalCopyPropagation()
> + {
> + for(SelectionBlock &block : *mblockList)
> + {
> + for (SelectionInstruction &insn :block.insnList)
> + {
> + for (uint8_t i = 0; i < insn.srcNum; ++i)
> + {
> + ReplaceInfoMap::iterator it =
> replaceInfoMap.find(insn.src(i).reg());
> + if (it != replaceInfoMap.end())
> + {
> + ReplaceInfo *info = it->second;
> + if (CanBeReplaced(info, insn, insn.src(i)))
> + {
> + info->toBeReplaceds.insert(&insn.src(i));
> + info->toBeReplacedInsns.insert(&insn);
> + }
> + else
> + {
> + replaceInfoMap.erase(it);
> + delete info;
> + }
> + }
> + }
> +
> + for (uint8_t i = 0; i < insn.dstNum; ++i)
> + {
> + ReplaceInfoMap::iterator it =
> replaceInfoMap.find(insn.dst(i).reg());
> + if (it != replaceInfoMap.end())
> + {
> + ReplaceInfo *info = it->second;
> + if(&(info->insn) != &insn)
> + {
> + replaceInfoMap.erase(it);
> + delete info;
> + }
> + }
> + }
> + }
> +
> + if(replaceInfoMap.empty())
> + break;
> + }
> +
> + cleanReplaceInfoMap();
> + }
> +
> + void SelGlobalImmMovOpt::run()
> + {
> + bool canbeOpt = false;
> +
> + /*global immediates are set in entry block, the following is
> example of GEN IR
> +
> + decl_input.global %41 dst
> + ## 0 output register ##
> + ## 0 pushed register
> + ## 3 blocks ##
> + LABEL $0
> + LOADI.uint32 %42 0
> + LOADI.uint32 %43 48
> +
> + LABEL $1
> + MUL.int32 %44 %3 %12
> + ADD.int32 %49 %42 %48
> + ...
> + */
> + SelectionBlock &block = *mblockList->begin();
> + for(SelectionInstruction &insn : block.insnList)
> + {
> + GenRegister src0 = insn.src(0);
> + if(insn.opcode == SEL_OP_MOV &&
> + src0.file == GEN_IMMEDIATE_VALUE &&
> + (src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_D ||
> src0.type == GEN_TYPE_F))
> + {
> + addToReplaceInfoMap(insn);
> + canbeOpt = true;
> +
> +#ifdef DEBUG_GLOBAL_IMM_OPT
> + outputSelectionInst(insn);
> +#endif
> + }
> + }
> +
> + if(!canbeOpt) return;
> +
> + doGlobalCopyPropagation();
> + }
> +
> void SelGlobalOptimizer::run()
> {
>
> }
>
> + BVAR(OCL_GLOBAL_IMM_OPTIMIZATION, true);
> +
> void Selection::optimize()
> {
> + //do global imm opt first to make more local opt
> + if(OCL_GLOBAL_IMM_OPTIMIZATION)
> + {
> + SelGlobalImmMovOpt gopt(getCtx(), opt_features, blockList);
> + gopt.run();
> + }
> +
> //do basic block level optimization
> for (SelectionBlock &block : *blockList) {
> SelBasicBlockOptimizer bbopt(getCtx(),
> getCtx().getLiveOut(block.bb), opt_features, block);
More information about the Beignet
mailing list