[Beignet] [PATCH V2 3/3] add local copy propagation optimization for each basic block

Zhigang Gong zhigang.gong at linux.intel.com
Wed Sep 23 18:13:39 PDT 2015


On Fri, Sep 11, 2015 at 08:50:49AM +0800, Guo Yejun wrote:
> It is done at selection ir level, it removes MOV instructions
> and helps to reduce the register pressure.
> 
> For instructions like:
>     MOV(8)              %42<2>:UB	:	%53<32,8,4>:UB
>     ADD(8)              %43<2>:B	:	%40<16,8,2>:B	-%42<16,8,2>:B
> can be optimized as:
>     ADD(8)              %43<2>:UB	:	%56<32,8,4>:UB	-%53<32,8,4>:UB
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp         |   4 +
>  backend/src/backend/gen_insn_selection.hpp         |   2 +
>  .../src/backend/gen_insn_selection_optimize.cpp    | 201 ++++++++++++++++++++-
>  3 files changed, 202 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 28c7ed2..038b839 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2062,6 +2062,10 @@ namespace gbe
>    ///////////////////////////////////////////////////////////////////////////
>    // Code selection public implementation
>    ///////////////////////////////////////////////////////////////////////////
> +  const GenContext& Selection::getCtx()
> +  {
> +    return this->opaque->ctx;
> +  }
>  
>    Selection::Selection(GenContext &ctx) {
>      this->blockList = NULL;
> diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
> index 86542b0..275eb9c 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -271,6 +271,8 @@ namespace gbe
>      void optimize(void);
>      uint32_t opt_features;
>  
> +    const GenContext &getCtx();
> +
>      /*! Use custom allocators */
>      GBE_CLASS(Selection);
>    };
> diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
> index c82fbe5..1c2f559 100644
> --- a/backend/src/backend/gen_insn_selection_optimize.cpp
> +++ b/backend/src/backend/gen_insn_selection_optimize.cpp
> @@ -12,38 +12,229 @@
>  
>  namespace gbe
>  {
> +  //helper functions
> +  static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
> +  {
> +    uint32_t elements = 0;
> +    uint32_t elementSize = typeSize(reg.type);
> +    uint32_t width = GenRegister::width_size(reg);
> +    assert(execWidth >= width);
> +    uint32_t height = execWidth / width;
> +    uint32_t vstride = GenRegister::vstride_size(reg);
> +    uint32_t hstride = GenRegister::hstride_size(reg);
> +    uint32_t base = reg.subnr;
> +    for (uint32_t i = 0; i < height; ++i) {
> +      uint32_t offsetInByte = base;
> +      for (uint32_t j = 0; j < width; ++j) {
> +        uint32_t offsetInType = offsetInByte / elementSize;
> +        elements |= (1 << offsetInType);
> +        offsetInByte += hstride * elementSize;
> +      }
> +      offsetInByte += vstride * elementSize;
> +    }
> +    return elements;
> +  }
>  
>    class SelOptimizer
>    {
>    public:
> -    SelOptimizer(uint32_t features) : features(features) {}
> +    SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {}
>      virtual void run() = 0;
>      virtual ~SelOptimizer() {}
>    protected:
> +    const GenContext &ctx;      //in case that we need it
>      uint32_t features;
>    };
>  
>    class SelBasicBlockOptimizer : public SelOptimizer
>    {
>    public:
> -    SelBasicBlockOptimizer(uint32_t features, SelectionBlock &bb) : SelOptimizer(features), bb(bb) {}
> +    SelBasicBlockOptimizer(const GenContext& ctx,
> +                           const ir::Liveness::LiveOut& liveout,
> +                           uint32_t features,
> +                           SelectionBlock &bb) :
> +        SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false)
> +    {
> +    }
>      ~SelBasicBlockOptimizer() {}
>      virtual void run();
>  
>    private:
> +    // local copy propagation
> +    class ReplaceInfo
> +    {
> +    public:
> +      ReplaceInfo(SelectionInstruction& insn,
> +                  const GenRegister& intermedia,
> +                  const GenRegister& replacement,
> +                  uint32_t execWidth) :
> +                  insn(insn), intermedia(intermedia), replacement(replacement)
> +      {
> +        assert(insn.opcode == SEL_OP_MOV);
> +        assert(&(insn.src(0)) == &replacement);
> +        assert(&(insn.dst(0)) == &intermedia);
> +        this->elements = CalculateElements(intermedia, execWidth);
> +        replacementChanged = false;
> +      }
> +      ~ReplaceInfo()
> +      {
> +        this->toBeReplacements.clear();
> +      }
> +
> +      SelectionInstruction& insn;
> +      const GenRegister& intermedia;
> +      uint32_t elements;
> +      const GenRegister& replacement;
> +      set<GenRegister*> toBeReplacements;
> +      bool replacementChanged;
> +      GBE_CLASS(ReplaceInfo);
> +    };
> +    set<ReplaceInfo*> replaceInfos;
> +    void doLocalCopyPropagation();
> +    void addToReplaceInfos(SelectionInstruction& insn);
> +    void changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var);
> +    void removeFromReplaceInfos(const GenRegister& var);
> +    void doReplacement(ReplaceInfo* info);
> +    void cleanReplaceInfos();
> +    static void propagateRegister(GenRegister& dst, const GenRegister& src);
> +
>      SelectionBlock &bb;
> -    static const size_t MaxTries = 1;   //the times for optimization
> +    const ir::Liveness::LiveOut& liveout;
> +    bool optimized;
> +    static const size_t MaxTries = 1;   //the max times of optimization try
>    };
>  
> +  void SelBasicBlockOptimizer::propagateRegister(GenRegister& dst, const GenRegister& src)
This function looks much like a GenRegister static method, because it doesn't reference any
external information.
> +  {
> +    dst.type = src.type;
> +    dst.file = src.file;
> +    dst.physical = src.physical;
> +    dst.subphysical = src.subphysical;
> +    dst.value.reg = src.value.reg;
> +    dst.vstride = src.vstride;
> +    dst.width = src.width;
> +    dst.hstride = src.hstride;
> +    dst.quarter = src.quarter;
> +    dst.nr = src.nr;
> +    dst.subnr = src.subnr;
> +    dst.address_mode = src.address_mode;
> +    dst.a0_subnr = src.a0_subnr;
> +    dst.addr_imm = src.addr_imm;
> +
> +    dst.negation = dst.negation ^ src.negation;
> +    dst.absolute = dst.absolute | src.absolute;
> +  }
> +
> +  void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info)
> +  {
> +    for (GenRegister* reg : info->toBeReplacements) {
> +      propagateRegister(*reg, info->replacement);
> +    }
> +    bb.insnList.erase(&(info->insn));
> +  }
> +
> +  void SelBasicBlockOptimizer::cleanReplaceInfos()
> +  {
> +    for (ReplaceInfo* info : replaceInfos) {
> +      doReplacement(info);
> +      delete info;
> +    }
> +    replaceInfos.clear();
> +  }
> +
> +  void SelBasicBlockOptimizer::removeFromReplaceInfos(const GenRegister& var)
> +  {
> +    for (set<ReplaceInfo*>::iterator pos = replaceInfos.begin(); pos != replaceInfos.end(); ) {
> +      ReplaceInfo* info = *pos;
> +      if (info->intermedia.reg() == var.reg()) {
> +        if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr)
> +          doReplacement(info);
> +        replaceInfos.erase(pos++);
> +        delete info;
> +      }
> +      else if (info->replacement.reg() == var.reg()) {
> +        info->replacementChanged = true;
> +        ++pos;
> +      }
> +      else
> +        ++pos;
> +    }
> +  }
> +
> +  void SelBasicBlockOptimizer::addToReplaceInfos(SelectionInstruction& insn)
> +  {
> +    assert(insn.opcode == SEL_OP_MOV);
> +    const GenRegister& src = insn.src(0);
> +    const GenRegister& dst = insn.dst(0);
> +    if (src.type != dst.type || src.file != dst.file)
> +      return;
> +
> +    if (liveout.find(dst.reg()) != liveout.end())
> +      return;
> +
> +    ReplaceInfo* info = new ReplaceInfo(insn, dst, src, insn.state.execWidth);
> +    replaceInfos.insert(info);
> +  }
> +
> +  void SelBasicBlockOptimizer::changeInsideReplaceInfos(const SelectionInstruction& insn, GenRegister& var)
> +  {
> +    for (ReplaceInfo* info : replaceInfos) {
> +      if (info->intermedia.reg() == var.reg()) {
> +        bool replaceable = false;
It's better to add a comment here to describe the cases which can't be replaced and why.

> +        if (insn.opcode != SEL_OP_BSWAP && !insn.isWrite()) {
> +          if (!info->replacementChanged && info->intermedia.type == var.type && info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr) {
> +            uint32_t elements = CalculateElements(var, insn.state.execWidth);
> +            if (info->elements == elements) {
Why not compare each attribute value, such as vstride_size,hstride_size,.....?

The other parts LGTM,

Thanks,
Zhigang Gong
> +              info->toBeReplacements.insert(&var);
> +              replaceable = true;
> +            }
> +          }
> +        }
> +
> +        //if we found the same register, but could not be replaced for some reason,
> +        //that means we could not remove MOV instruction, and so no replacement,
> +        //so we'll remove the info for this case.
> +        if (!replaceable) {
> +          replaceInfos.erase(info);
> +          delete info;
> +        }
> +        break;
> +      }
> +    }
> +  }
> +
> +  void SelBasicBlockOptimizer::doLocalCopyPropagation()
> +  {
> +    for (SelectionInstruction &insn : bb.insnList) {
> +      for (uint8_t i = 0; i < insn.srcNum; ++i)
> +        changeInsideReplaceInfos(insn, insn.src(i));
> +
> +      for (uint8_t i = 0; i < insn.dstNum; ++i)
> +        removeFromReplaceInfos(insn.dst(i));
> +
> +      if (insn.opcode == SEL_OP_MOV)
> +        addToReplaceInfos(insn);
> +    }
> +    cleanReplaceInfos();
> +  }
> +
>    void SelBasicBlockOptimizer::run()
>    {
> +    for (size_t i = 0; i < MaxTries; ++i) {
> +      optimized = false;
>  
> +      doLocalCopyPropagation();
> +      //doOtherLocalOptimization();
> +
> +      if (!optimized)
> +        break;      //break since no optimization found at this round
> +    }
>    }
>  
>    class SelGlobalOptimizer : public SelOptimizer
>    {
>    public:
> -    SelGlobalOptimizer(uint32_t features) : SelOptimizer(features) {}
> +    SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {}
>      ~SelGlobalOptimizer() {}
>      virtual void run();
>    };
> @@ -57,7 +248,7 @@ namespace gbe
>    {
>      //do basic block level optimization
>      for (SelectionBlock &block : *blockList) {
> -      SelBasicBlockOptimizer bbopt(opt_features, block);
> +      SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
>        bbopt.run();
>      }
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list