[Beignet] [PATCH V2 3/3] add local copy propagation optimization for each basic block
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Sep 28 22:25:36 PDT 2015
LGTM.
Thanks.
On Mon, Sep 28, 2015 at 03:23:01AM +0800, Guo Yejun wrote:
> It is done at selection ir level, it removes MOV instructions
> and helps to reduce the register pressure.
>
> For instructions like:
> MOV(8) %42<2>:UB : %53<32,8,4>:UB
> ADD(8) %43<2>:B : %40<16,8,2>:B -%42<16,8,2>:B
> can be optimized as:
> ADD(8) %43<2>:UB : %56<32,8,4>:UB -%53<32,8,4>:UB
>
> v2: move propagateRegister() as static mothod of GenRegister
> refine replaceInfo from set to map
> encapsulate function CanBeReplaced
>
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> backend/src/backend/gen_insn_selection.cpp | 4 +
> backend/src/backend/gen_insn_selection.hpp | 2 +
> .../src/backend/gen_insn_selection_optimize.cpp | 203 ++++++++++++++++++++-
> backend/src/backend/gen_register.hpp | 21 +++
> 4 files changed, 225 insertions(+), 5 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 28c7ed2..038b839 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2062,6 +2062,10 @@ namespace gbe
> ///////////////////////////////////////////////////////////////////////////
> // Code selection public implementation
> ///////////////////////////////////////////////////////////////////////////
> + const GenContext& Selection::getCtx()
> + {
> + return this->opaque->ctx;
> + }
>
> Selection::Selection(GenContext &ctx) {
> this->blockList = NULL;
> diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
> index 86542b0..275eb9c 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -271,6 +271,8 @@ namespace gbe
> void optimize(void);
> uint32_t opt_features;
>
> + const GenContext &getCtx();
> +
> /*! Use custom allocators */
> GBE_CLASS(Selection);
> };
> diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
> index c82fbe5..3f2ae2f 100644
> --- a/backend/src/backend/gen_insn_selection_optimize.cpp
> +++ b/backend/src/backend/gen_insn_selection_optimize.cpp
> @@ -12,38 +12,231 @@
>
> namespace gbe
> {
> + //helper functions
> + static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
> + {
> + uint32_t elements = 0;
> + uint32_t elementSize = typeSize(reg.type);
> + uint32_t width = GenRegister::width_size(reg);
> + assert(execWidth >= width);
> + uint32_t height = execWidth / width;
> + uint32_t vstride = GenRegister::vstride_size(reg);
> + uint32_t hstride = GenRegister::hstride_size(reg);
> + uint32_t base = reg.subnr;
> + for (uint32_t i = 0; i < height; ++i) {
> + uint32_t offsetInByte = base;
> + for (uint32_t j = 0; j < width; ++j) {
> + uint32_t offsetInType = offsetInByte / elementSize;
> + elements |= (1 << offsetInType);
> + offsetInByte += hstride * elementSize;
> + }
> + offsetInByte += vstride * elementSize;
> + }
> + return elements;
> + }
>
> class SelOptimizer
> {
> public:
> - SelOptimizer(uint32_t features) : features(features) {}
> + SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {}
> virtual void run() = 0;
> virtual ~SelOptimizer() {}
> protected:
> + const GenContext &ctx; //in case that we need it
> uint32_t features;
> };
>
> class SelBasicBlockOptimizer : public SelOptimizer
> {
> public:
> - SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : SelOptimizer(features), bb(bb) {}
> + SelBasicBlockOptimizer(const GenContext& ctx,
> + const ir::Liveness::LiveOut& liveout,
> + uint32_t features,
> + SelectionBlock &bb) :
> + SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false)
> + {
> + }
> ~SelBasicBlockOptimizer() {}
> virtual void run();
>
> private:
> + // local copy propagation
> + class ReplaceInfo
> + {
> + public:
> + ReplaceInfo(SelectionInstruction& insn,
> + const GenRegister& intermedia,
> + const GenRegister& replacement) :
> + insn(insn), intermedia(intermedia), replacement(replacement)
> + {
> + assert(insn.opcode == SEL_OP_MOV);
> + assert(&(insn.src(0)) == &replacement);
> + assert(&(insn.dst(0)) == &intermedia);
> + this->elements = CalculateElements(intermedia, insn.state.execWidth);
> + replacementOverwritten = false;
> + }
> + ~ReplaceInfo()
> + {
> + this->toBeReplaceds.clear();
> + }
> +
> + SelectionInstruction& insn;
> + const GenRegister& intermedia;
> + uint32_t elements;
> + const GenRegister& replacement;
> + set<GenRegister*> toBeReplaceds;
> + bool replacementOverwritten;
> + GBE_CLASS(ReplaceInfo);
> + };
> + typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
> + ReplaceInfoMap replaceInfoMap;
> + void doLocalCopyPropagation();
> + void addToReplaceInfoMap(SelectionInstruction& insn);
> + void changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var);
> + void removeFromReplaceInfoMap(const GenRegister& var);
> + void doReplacement(ReplaceInfo* info);
> + bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var);
> + void cleanReplaceInfoMap();
> +
> SelectionBlock &bb;
> - static const size_t MaxTries = 1; //the times for optimization
> + const ir::Liveness::LiveOut& liveout;
> + bool optimized;
> + static const size_t MaxTries = 1; //the max times of optimization try
> };
>
> + void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info)
> + {
> + for (GenRegister* reg : info->toBeReplaceds) {
> + GenRegister::propagateRegister(*reg, info->replacement);
> + }
> + bb.insnList.erase(&(info->insn));
> + optimized = true;
> + }
> +
> + void SelBasicBlockOptimizer::cleanReplaceInfoMap()
> + {
> + for (auto& pair : replaceInfoMap) {
> + ReplaceInfo* info = pair.second;
> + doReplacement(info);
> + delete info;
> + }
> + replaceInfoMap.clear();
> + }
> +
> + void SelBasicBlockOptimizer::removeFromReplaceInfoMap(const GenRegister& var)
> + {
> + for (ReplaceInfoMap::iterator pos = replaceInfoMap.begin(); pos != replaceInfoMap.end(); ++pos) {
> + ReplaceInfo* info = pos->second;
> + if (info->intermedia.reg() == var.reg()) { //intermedia is overwritten
> + if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
> + //the whole intermedia is overwritten, so, do replacement for the scanned IRs
> + doReplacement(info);
> + }
> + replaceInfoMap.erase(pos);
> + delete info;
> + return;
> + }
> + if (info->replacement.reg() == var.reg()) { //replacement is overwritten
> + info->replacementOverwritten = true;
> + return;
> + }
> + }
> + }
> +
> + void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& insn)
> + {
> + assert(insn.opcode == SEL_OP_MOV);
> + const GenRegister& src = insn.src(0);
> + const GenRegister& dst = insn.dst(0);
> + if (src.type != dst.type || src.file != dst.file)
> + return;
> +
> + if (liveout.find(dst.reg()) != liveout.end())
> + return;
> +
> + ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
> + replaceInfoMap[dst.reg()] = info;
> + }
> +
> + bool SelBasicBlockOptimizer::CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var)
> + {
> + //some conditions here are very strict, while some conditions are very light
> + //the reason is that i'm unable to find a perfect condition now in the first version
> + //need to refine the conditions when debugging/optimizing real kernels
> +
> + if (insn.opcode == SEL_OP_BSWAP) //should remove once bswap issue is fixed
> + return false;
> +
> + if (insn.isWrite() || insn.isRead()) //register in selection vector
> + return false;
> +
> + if (info->replacementOverwritten)
> + return false;
> +
> + if (info->insn.state.noMask == 0 && insn.state.noMask == 1)
> + return false;
> +
> + if (info->insn.state.predicate != insn.state.predicate && info->insn.state.predicate != GEN_PREDICATE_NONE)
> + return false;
> +
> + if (info->intermedia.type == var.type && info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
> + uint32_t elements = CalculateElements(var, insn.state.execWidth); //considering width, hstrid, vstrid and execWidth
> + if (info->elements == elements)
> + return true;
> + }
> +
> + return false;
> + }
> +
> + void SelBasicBlockOptimizer::changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var)
> + {
> + ReplaceInfoMap::iterator it = replaceInfoMap.find(var.reg());
> + if (it != replaceInfoMap.end()) { //same ir register
> + ReplaceInfo* info = it->second;
> + if (CanBeReplaced(info, insn, var)) {
> + info->toBeReplaceds.insert(&var);
> + } else {
> + //if it is the same ir register, but could not be replaced for some reason,
> + //that means we could not remove MOV instruction, and so no replacement,
> + //so we'll remove the info for this case.
> + replaceInfoMap.erase(it);
> + delete info;
> + }
> + }
> + }
> +
> + void SelBasicBlockOptimizer::doLocalCopyPropagation()
> + {
> + for (SelectionInstruction &insn : bb.insnList) {
> + for (uint8_t i = 0; i < insn.srcNum; ++i)
> + changeInsideReplaceInfoMap(insn, insn.src(i));
> +
> + for (uint8_t i = 0; i < insn.dstNum; ++i)
> + removeFromReplaceInfoMap(insn.dst(i));
> +
> + if (insn.opcode == SEL_OP_MOV)
> + addToReplaceInfoMap(insn);
> + }
> + cleanReplaceInfoMap();
> + }
> +
> void SelBasicBlockOptimizer::run()
> {
> + for (size_t i = 0; i < MaxTries; ++i) {
> + optimized = false;
>
> + doLocalCopyPropagation();
> + //doOtherLocalOptimization();
> +
> + if (!optimized)
> + break; //break since no optimization found at this round
> + }
> }
>
> class SelGlobalOptimizer : public SelOptimizer
> {
> public:
> - SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {}
> + SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {}
> ~SelGlobalOptimizer() {}
> virtual void run();
> };
> @@ -57,7 +250,7 @@ namespace gbe
> {
> //do basic block level optimization
> for (SelectionBlock &block : *blockList) {
> - SelBasicBlockOptimizer bbopt(opt_features, block);
> + SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
> bbopt.run();
> }
>
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index a1bcd5a..0f5d7cf 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -1212,6 +1212,27 @@ namespace gbe
> return reg;
> }
>
> + static INLINE void propagateRegister(GenRegister& dst, const GenRegister& src)
> + {
> + dst.type = src.type;
> + dst.file = src.file;
> + dst.physical = src.physical;
> + dst.subphysical = src.subphysical;
> + dst.value.reg = src.value.reg;
> + dst.vstride = src.vstride;
> + dst.width = src.width;
> + dst.hstride = src.hstride;
> + dst.quarter = src.quarter;
> + dst.nr = src.nr;
> + dst.subnr = src.subnr;
> + dst.address_mode = src.address_mode;
> + dst.a0_subnr = src.a0_subnr;
> + dst.addr_imm = src.addr_imm;
> +
> + dst.negation = dst.negation ^ src.negation;
> + dst.absolute = dst.absolute | src.absolute;
> + }
> +
> /*! Generate register encoding with run-time simdWidth */
> #define DECL_REG_ENCODER(NAME, SIMD16, SIMD8, SIMD1) \
> template <typename... Args> \
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list