[Beignet] [PATCH V2 3/3] add local copy propagation optimization for each basic block
Guo Yejun
yejun.guo at intel.com
Sun Sep 27 12:23:01 PDT 2015
It is done at selection ir level, it removes MOV instructions
and helps to reduce the register pressure.
For instructions like:
MOV(8) %42<2>:UB : %53<32,8,4>:UB
ADD(8) %43<2>:B : %40<16,8,2>:B -%42<16,8,2>:B
can be optimized as:
ADD(8) %43<2>:UB : %56<32,8,4>:UB -%53<32,8,4>:UB
v2: move propagateRegister() as static mothod of GenRegister
refine replaceInfo from set to map
encapsulate function CanBeReplaced
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 4 +
backend/src/backend/gen_insn_selection.hpp | 2 +
.../src/backend/gen_insn_selection_optimize.cpp | 203 ++++++++++++++++++++-
backend/src/backend/gen_register.hpp | 21 +++
4 files changed, 225 insertions(+), 5 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 28c7ed2..038b839 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2062,6 +2062,10 @@ namespace gbe
///////////////////////////////////////////////////////////////////////////
// Code selection public implementation
///////////////////////////////////////////////////////////////////////////
+ const GenContext& Selection::getCtx()
+ {
+ return this->opaque->ctx;
+ }
Selection::Selection(GenContext &ctx) {
this->blockList = NULL;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 86542b0..275eb9c 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -271,6 +271,8 @@ namespace gbe
void optimize(void);
uint32_t opt_features;
+ const GenContext &getCtx();
+
/*! Use custom allocators */
GBE_CLASS(Selection);
};
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index c82fbe5..3f2ae2f 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -12,38 +12,231 @@
namespace gbe
{
+ //helper functions
+ static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
+ {
+ uint32_t elements = 0;
+ uint32_t elementSize = typeSize(reg.type);
+ uint32_t width = GenRegister::width_size(reg);
+ assert(execWidth >= width);
+ uint32_t height = execWidth / width;
+ uint32_t vstride = GenRegister::vstride_size(reg);
+ uint32_t hstride = GenRegister::hstride_size(reg);
+ uint32_t base = reg.subnr;
+ for (uint32_t i = 0; i < height; ++i) {
+ uint32_t offsetInByte = base;
+ for (uint32_t j = 0; j < width; ++j) {
+ uint32_t offsetInType = offsetInByte / elementSize;
+ elements |= (1 << offsetInType);
+ offsetInByte += hstride * elementSize;
+ }
+ offsetInByte += vstride * elementSize;
+ }
+ return elements;
+ }
class SelOptimizer
{
public:
- SelOptimizer(uint32_t features) : features(features) {}
+ SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {}
virtual void run() = 0;
virtual ~SelOptimizer() {}
protected:
+ const GenContext &ctx; //in case that we need it
uint32_t features;
};
class SelBasicBlockOptimizer : public SelOptimizer
{
public:
- SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : SelOptimizer(features), bb(bb) {}
+ SelBasicBlockOptimizer(const GenContext& ctx,
+ const ir::Liveness::LiveOut& liveout,
+ uint32_t features,
+ SelectionBlock &bb) :
+ SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false)
+ {
+ }
~SelBasicBlockOptimizer() {}
virtual void run();
private:
+ // local copy propagation
+ class ReplaceInfo
+ {
+ public:
+ ReplaceInfo(SelectionInstruction& insn,
+ const GenRegister& intermedia,
+ const GenRegister& replacement) :
+ insn(insn), intermedia(intermedia), replacement(replacement)
+ {
+ assert(insn.opcode == SEL_OP_MOV);
+ assert(&(insn.src(0)) == &replacement);
+ assert(&(insn.dst(0)) == &intermedia);
+ this->elements = CalculateElements(intermedia, insn.state.execWidth);
+ replacementOverwritten = false;
+ }
+ ~ReplaceInfo()
+ {
+ this->toBeReplaceds.clear();
+ }
+
+ SelectionInstruction& insn;
+ const GenRegister& intermedia;
+ uint32_t elements;
+ const GenRegister& replacement;
+ set<GenRegister*> toBeReplaceds;
+ bool replacementOverwritten;
+ GBE_CLASS(ReplaceInfo);
+ };
+ typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
+ ReplaceInfoMap replaceInfoMap;
+ void doLocalCopyPropagation();
+ void addToReplaceInfoMap(SelectionInstruction& insn);
+ void changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var);
+ void removeFromReplaceInfoMap(const GenRegister& var);
+ void doReplacement(ReplaceInfo* info);
+ bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var);
+ void cleanReplaceInfoMap();
+
SelectionBlock &bb;
- static const size_t MaxTries = 1; //the times for optimization
+ const ir::Liveness::LiveOut& liveout;
+ bool optimized;
+ static const size_t MaxTries = 1; //the max times of optimization try
};
+ void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info)
+ {
+ for (GenRegister* reg : info->toBeReplaceds) {
+ GenRegister::propagateRegister(*reg, info->replacement);
+ }
+ bb.insnList.erase(&(info->insn));
+ optimized = true;
+ }
+
+ void SelBasicBlockOptimizer::cleanReplaceInfoMap()
+ {
+ for (auto& pair : replaceInfoMap) {
+ ReplaceInfo* info = pair.second;
+ doReplacement(info);
+ delete info;
+ }
+ replaceInfoMap.clear();
+ }
+
+ void SelBasicBlockOptimizer::removeFromReplaceInfoMap(const GenRegister& var)
+ {
+ for (ReplaceInfoMap::iterator pos = replaceInfoMap.begin(); pos != replaceInfoMap.end(); ++pos) {
+ ReplaceInfo* info = pos->second;
+ if (info->intermedia.reg() == var.reg()) { //intermedia is overwritten
+ if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
+ //the whole intermedia is overwritten, so, do replacement for the scanned IRs
+ doReplacement(info);
+ }
+ replaceInfoMap.erase(pos);
+ delete info;
+ return;
+ }
+ if (info->replacement.reg() == var.reg()) { //replacement is overwritten
+ info->replacementOverwritten = true;
+ return;
+ }
+ }
+ }
+
+ void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& insn)
+ {
+ assert(insn.opcode == SEL_OP_MOV);
+ const GenRegister& src = insn.src(0);
+ const GenRegister& dst = insn.dst(0);
+ if (src.type != dst.type || src.file != dst.file)
+ return;
+
+ if (liveout.find(dst.reg()) != liveout.end())
+ return;
+
+ ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
+ replaceInfoMap[dst.reg()] = info;
+ }
+
+ bool SelBasicBlockOptimizer::CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var)
+ {
+ //some conditions here are very strict, while some conditions are very light
+ //the reason is that i'm unable to find a perfect condition now in the first version
+ //need to refine the conditions when debugging/optimizing real kernels
+
+ if (insn.opcode == SEL_OP_BSWAP) //should remove once bswap issue is fixed
+ return false;
+
+ if (insn.isWrite() || insn.isRead()) //register in selection vector
+ return false;
+
+ if (info->replacementOverwritten)
+ return false;
+
+ if (info->insn.state.noMask == 0 && insn.state.noMask == 1)
+ return false;
+
+ if (info->insn.state.predicate != insn.state.predicate && info->insn.state.predicate != GEN_PREDICATE_NONE)
+ return false;
+
+ if (info->intermedia.type == var.type && info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
+ uint32_t elements = CalculateElements(var, insn.state.execWidth); //considering width, hstrid, vstrid and execWidth
+ if (info->elements == elements)
+ return true;
+ }
+
+ return false;
+ }
+
+ void SelBasicBlockOptimizer::changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var)
+ {
+ ReplaceInfoMap::iterator it = replaceInfoMap.find(var.reg());
+ if (it != replaceInfoMap.end()) { //same ir register
+ ReplaceInfo* info = it->second;
+ if (CanBeReplaced(info, insn, var)) {
+ info->toBeReplaceds.insert(&var);
+ } else {
+ //if it is the same ir register, but could not be replaced for some reason,
+ //that means we could not remove MOV instruction, and so no replacement,
+ //so we'll remove the info for this case.
+ replaceInfoMap.erase(it);
+ delete info;
+ }
+ }
+ }
+
+ void SelBasicBlockOptimizer::doLocalCopyPropagation()
+ {
+ for (SelectionInstruction &insn : bb.insnList) {
+ for (uint8_t i = 0; i < insn.srcNum; ++i)
+ changeInsideReplaceInfoMap(insn, insn.src(i));
+
+ for (uint8_t i = 0; i < insn.dstNum; ++i)
+ removeFromReplaceInfoMap(insn.dst(i));
+
+ if (insn.opcode == SEL_OP_MOV)
+ addToReplaceInfoMap(insn);
+ }
+ cleanReplaceInfoMap();
+ }
+
void SelBasicBlockOptimizer::run()
{
+ for (size_t i = 0; i < MaxTries; ++i) {
+ optimized = false;
+ doLocalCopyPropagation();
+ //doOtherLocalOptimization();
+
+ if (!optimized)
+ break; //break since no optimization found at this round
+ }
}
class SelGlobalOptimizer : public SelOptimizer
{
public:
- SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {}
+ SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {}
~SelGlobalOptimizer() {}
virtual void run();
};
@@ -57,7 +250,7 @@ namespace gbe
{
//do basic block level optimization
for (SelectionBlock &block : *blockList) {
- SelBasicBlockOptimizer bbopt(opt_features, block);
+ SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
bbopt.run();
}
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index a1bcd5a..0f5d7cf 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -1212,6 +1212,27 @@ namespace gbe
return reg;
}
+ static INLINE void propagateRegister(GenRegister& dst, const GenRegister& src)
+ {
+ dst.type = src.type;
+ dst.file = src.file;
+ dst.physical = src.physical;
+ dst.subphysical = src.subphysical;
+ dst.value.reg = src.value.reg;
+ dst.vstride = src.vstride;
+ dst.width = src.width;
+ dst.hstride = src.hstride;
+ dst.quarter = src.quarter;
+ dst.nr = src.nr;
+ dst.subnr = src.subnr;
+ dst.address_mode = src.address_mode;
+ dst.a0_subnr = src.a0_subnr;
+ dst.addr_imm = src.addr_imm;
+
+ dst.negation = dst.negation ^ src.negation;
+ dst.absolute = dst.absolute | src.absolute;
+ }
+
/*! Generate register encoding with run-time simdWidth */
#define DECL_REG_ENCODER(NAME, SIMD16, SIMD8, SIMD1) \
template <typename... Args> \
--
1.9.1
More information about the Beignet
mailing list