[Beignet] [PATCH V4] backend: add global immediate optimization

Wang, Rander rander.wang at intel.com
Fri Jun 30 01:46:20 UTC 2017


Hi,

	The abs of UD has to be done if it is encoded in instruction no matter it make sense or not.
    And I have discussed with my collage and refine it.
    First we inspect the HW behavior of ABS(UD), -(UD) and find that ABS(UD) = UD,
    -(UD) = the result of -(UD) on CPU.

	So the abs calculation can be removed and this will make it compiled pass.

Rander 

-----Original Message-----
From: Ivan Shapovalov [mailto:intelfx at intelfx.name] 
Sent: Wednesday, June 28, 2017 8:54 AM
To: Wang, Rander <rander.wang at intel.com>; beignet at lists.freedesktop.org
Cc: Song, Ruiling <ruiling.song at intel.com>
Subject: Re: [Beignet] [PATCH V4] backend: add global immediate optimization

On 2017-06-14 at 13:55 +0800, rander.wanga wrote:
>         there are some global immediates in global var list of LLVM.
>         these imm can be integrated in instructions. for 
> compiler_global_immediate_optimized test
>         in utest, there are two global immediates:
>         L0:
>             MOV(1)              %42<0>:UD       :       0x0:UD
>             MOV(1)              %43<0>:UD       :       0x30:UD
> 
>             used by:
>             ADD(16)             %49<1>:D        :       %42<0,1,0>:D 
>    %48<8,8,1>:D
>             ADD(16)             %54<1>:D        :       %43<0,1,0>:D 
>    %53<8,8,1>:D
> 
>         it can be
>             ADD(16)             %49<1>:D    :       %48<8,8,1>:D   0x
> 0:UD
>             ADD(16)             %54<1>:D    :       %53<8,8,1>:D   0x
> 30:UD
> 
> 	Then the MOV can be removed. And after this optimization, ADD 0 can 
> be change
> 	to MOV, then local copy propagation can be done.
> 
> 	V2: (1) add environment variable to enable/disable the optimization
> 	    (2) refine the architecture of imm optimization, inherit from 
> global
>                 optimizer not local block optimizer
> 
> 	V3: merge with latest master driver
> 
> 	V4: (1)refine some type errors
> 	    (2)remove UD/D check for no need
> 	    (3)refine imm calculate for UD/D
> 
> Signed-off-by: rander.wang <rander.wang at intel.com>
> ---
>  .../src/backend/gen_insn_selection_optimize.cpp    | 367
> +++++++++++++++++++--
>  1 file changed, 342 insertions(+), 25 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp
> b/backend/src/backend/gen_insn_selection_optimize.cpp
> index 07547ec..eb93a20 100644
> --- a/backend/src/backend/gen_insn_selection_optimize.cpp
> +++ b/backend/src/backend/gen_insn_selection_optimize.cpp
> @@ -40,6 +40,33 @@ namespace gbe
>      return elements;
>    }
>  
> +  class ReplaceInfo
> +  {
> +  public:
> +    ReplaceInfo(SelectionInstruction &insn,
> +                const GenRegister &intermedia,
> +                const GenRegister &replacement) : insn(insn),
> intermedia(intermedia), replacement(replacement)
> +    {
> +      assert(insn.opcode == SEL_OP_MOV || insn.opcode ==
> SEL_OP_ADD);
> +      assert(&(insn.dst(0)) == &intermedia);
> +      this->elements = CalculateElements(intermedia,
> insn.state.execWidth);
> +      replacementOverwritten = false;
> +    }
> +    ~ReplaceInfo()
> +    {
> +      this->toBeReplaceds.clear();
> +    }
> +
> +    SelectionInstruction &insn;
> +    const GenRegister &intermedia;
> +    uint32_t elements;
> +    const GenRegister &replacement;
> +    set<GenRegister *> toBeReplaceds;
> +    set<SelectionInstruction*> toBeReplacedInsns;
> +    bool replacementOverwritten;
> +    GBE_CLASS(ReplaceInfo);
> +  };
> +
>    class SelOptimizer
>    {
>    public:
> @@ -66,32 +93,7 @@ namespace gbe
>  
>    private:
>      // local copy propagation
> -    class ReplaceInfo
> -    {
> -    public:
> -      ReplaceInfo(SelectionInstruction& insn,
> -                  const GenRegister& intermedia,
> -                  const GenRegister& replacement) :
> -                  insn(insn), intermedia(intermedia),
> replacement(replacement)
> -      {
> -        assert(insn.opcode == SEL_OP_MOV || insn.opcode ==
> SEL_OP_ADD);
> -        assert(&(insn.dst(0)) == &intermedia);
> -        this->elements = CalculateElements(intermedia,
> insn.state.execWidth);
> -        replacementOverwritten = false;
> -      }
> -      ~ReplaceInfo()
> -      {
> -        this->toBeReplaceds.clear();
> -      }
>  
> -      SelectionInstruction& insn;
> -      const GenRegister& intermedia;
> -      uint32_t elements;
> -      const GenRegister& replacement;
> -      set<GenRegister*> toBeReplaceds;
> -      bool replacementOverwritten;
> -      GBE_CLASS(ReplaceInfo);
> -    };
>      typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
>      ReplaceInfoMap replaceInfoMap;
>      void doLocalCopyPropagation();
> @@ -298,13 +300,328 @@ namespace gbe
>      virtual void run();
>    };
>  
> +  class SelGlobalImmMovOpt : public SelGlobalOptimizer  {
> +  public:
> +    SelGlobalImmMovOpt(const GenContext& ctx, uint32_t features,
> intrusive_list<SelectionBlock> *blockList) :
> +      SelGlobalOptimizer(ctx, features)
> +      {
> +        mblockList = blockList;
> +      }
> +
> +    virtual void run();
> +
> +    void addToReplaceInfoMap(SelectionInstruction& insn);
> +    void doGlobalCopyPropagation();
> +    bool CanBeReplaced(const ReplaceInfo* info,
> SelectionInstruction& insn, const GenRegister& var);
> +    void cleanReplaceInfoMap();
> +    void doReplacement(ReplaceInfo* info);
> +
> +  private:
> +    intrusive_list<SelectionBlock> *mblockList;
> +
> +    typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
> +    ReplaceInfoMap replaceInfoMap;
> +
> +  };
> +
> +  extern void outputSelectionInst(SelectionInstruction &insn);
> +
> +  void SelGlobalImmMovOpt::cleanReplaceInfoMap()
> +  {
> +    for (auto& pair : replaceInfoMap) {
> +      ReplaceInfo* info = pair.second;
> +      doReplacement(info);
> +      delete info;
> +    }
> +    replaceInfoMap.clear();
> +  }
> +
> +#define RESULT() switch(insn->opcode) \
> +            { \
> +              case SEL_OP_ADD: \
> +                result = s0 + s1; \
> +                break; \
> +              case SEL_OP_MUL: \
> +                result = s0 * s1; \
> +                break; \
> +              case SEL_OP_AND: \
> +                result = s0 & s1; \
> +                break; \
> +              case SEL_OP_OR: \
> +                result = s0 | s1; \
> +                break; \
> +              case SEL_OP_XOR: \
> +                result = s0 ^ s1; \
> +                break; \
> +              default: \
> +                assert(0); \
> +                break; \
> +            }
> +
> +  void SelGlobalImmMovOpt::doReplacement(ReplaceInfo* info)  {
> +    for (GenRegister* reg : info->toBeReplaceds) {
> +      GenRegister::propagateRegister(*reg, info->replacement);
> +    }
> +
> +    //after imm opt, maybe both src are imm, convert it to mov
> +    for (SelectionInstruction* insn : info->toBeReplacedInsns) {
> +      GenRegister& src0 = insn->src(0);
> +      GenRegister& src1 = insn->src(1);
> +      if (src0.file == GEN_IMMEDIATE_VALUE)
> +      {
> +        if (src1.file == GEN_IMMEDIATE_VALUE)
> +        {
> +          if (src0.type == GEN_TYPE_F)
> +          {
> +            float s0 = src0.value.f;
> +            if (src0.absolute)
> +              s0 = fabs(s0);
> +            if (src0.negation)
> +              s0 = -s0;
> +
> +            float s1 = src1.value.f;
> +            if (src1.absolute)
> +              s1 = fabs(s1);
> +            if (src1.negation)
> +              s1 = -s1;
> +
> +            float result;
> +            switch(insn->opcode)
> +            {
> +              case SEL_OP_ADD:
> +                result = s0 + s1;
> +                break;
> +              case SEL_OP_MUL:
> +                result = s0 * s1;
> +                break;
> +              default:
> +                assert(0);
> +                break;
> +            }
> +
> +            insn->src(0) = GenRegister::immf(result);
> +          }
> +          else if (src0.type == GEN_TYPE_D && src1.type ==
> GEN_TYPE_D)
> +          {
> +            int s0 = src0.value.d;
> +            if (src0.absolute)
> +              s0 = abs(s0);
> +            if (src0.negation)
> +              s0 = -s0;
> +
> +            int s1 = src1.value.d;
> +            if (src1.absolute)
> +              s1 = abs(s1);
> +            if (src1.negation)
> +              s1 = -s1;
> +
> +            int result;
> +            RESULT();
> +            insn->src(0) = GenRegister::immd(result);
> +            }
> +            else if(src0.type == GEN_TYPE_UD || src1.type ==
> GEN_TYPE_UD)
> +            {
> +              unsigned int s0 = src0.value.ud;
> +              if (src0.absolute)
> +                s0 = abs(s0);
> +              if (src0.negation)
> +                s0 = -s0;

Hi,

are you sure this is OK? This piece of code (and the similar one directly below) doesn't compile here:

/tmp/makepkg/build/beignet-git/src/beignet/backend/src/backend/gen_insn_selection_optimize.cpp: In member function 'void gbe::SelGlobalImmMovOpt::doReplacement(gbe::ReplaceInfo*)':                                                           
/tmp/makepkg/build/beignet-git/src/beignet/backend/src/backend/gen_insn_selection_optimize.cpp:462:28: error: call of overloaded 'abs(unsigned int&)' is ambiguous                                                                             
                 s0 = abs(s0);

I'm not familiar with the ISA that you work with, but the problem seems real (not a false positive), since either of your operands may not be a GEN_TYPE_UD.

Even if you work around the immediate problem by, say, reinterpret_casting to int& and back, the computation inside RESULT() will still yield nonsense if it's multiplication or division and either of the operands is a negative signed int.

(Please include me in replies as I'm not subscribed to the list.)

--
Ivan Shapovalov / intelfx /

> +
> +              unsigned int s1 = src1.value.ud;
> +              if (src1.absolute)
> +                s1 = abs(s1);
> +              if (src1.negation)
> +                s1 = -s1;
> +
> +              unsigned int result;
> +              RESULT();
> +              insn->src(0) = GenRegister::immud(result);
> +            }
> +            else
> +            {
> +              assert(0);
> +            }
> +
> +            insn->opcode = SEL_OP_MOV;
> +            insn->srcNum = 1;
> +        }
> +        else
> +        {
> +          //src0 cant be immediate, so exchange with src1
> +          GenRegister tmp;
> +          tmp = src0;
> +          src0 = src1;
> +          src1 = tmp;
> +        }
> +      }
> +    }
> +
> +    info->insn.parent->insnList.erase(&(info->insn));
> +  }
> +
> +  void SelGlobalImmMovOpt::addToReplaceInfoMap(SelectionInstruction&
> insn)
> +  {
> +    assert(insn.opcode == SEL_OP_MOV);
> +    const GenRegister& src = insn.src(0);
> +    const GenRegister& dst = insn.dst(0);
> +    if (src.type != dst.type)
> +      return;
> +
> +    ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
> +    replaceInfoMap[dst.reg()] = info;  }
> +
> +  bool SelGlobalImmMovOpt::CanBeReplaced(const ReplaceInfo* info,
> SelectionInstruction& insn, const GenRegister& var)
> +  {
> +    if(var.file == GEN_IMMEDIATE_VALUE)
> +      return false;
> +
> +    switch(insn.opcode)
> +    {
> +      // imm source is supported by these instructions. And
> +      // the src operators can be exchange in these instructions
> +      case SEL_OP_ADD:
> +      case SEL_OP_MUL:
> +      case SEL_OP_AND:
> +      case SEL_OP_OR:
> +      case SEL_OP_XOR:
> +        break;
> +      default:
> +        return false;
> +    }
> +
> +    if(info->intermedia.type != var.type)
> +    {
> +        assert(info->replacement.file == GEN_IMMEDIATE_VALUE);
> +        if(!((var.type == GEN_TYPE_D && info->intermedia.type ==
> GEN_TYPE_UD)
> +          || (var.type == GEN_TYPE_UD && info->intermedia.type ==
> GEN_TYPE_D)))
> +        {
> +          return false;
> +        }
> +    }
> +
> +    if (info->intermedia.quarter == var.quarter &&
> +          info->intermedia.subnr == var.subnr &&
> +            info->intermedia.nr == var.nr)
> +    {
> +      uint32_t elements = CalculateElements(var,
> insn.state.execWidth);  //considering width, hstrid, vstrid and 
> execWidth
> +      if (info->elements != elements)
> +        return false;
> +    }
> +
> +#ifdef DEBUG_GLOBAL_IMM_OPT
> +    outputSelectionInst(insn);
> +#endif
> +
> +    return true;
> +  }
> +
> +  void SelGlobalImmMovOpt::doGlobalCopyPropagation()
> +  {
> +    for(SelectionBlock &block : *mblockList)
> +    {
> +      for (SelectionInstruction &insn :block.insnList)
> +      {
> +        for (uint8_t i = 0; i < insn.srcNum; ++i)
> +        {
> +            ReplaceInfoMap::iterator it =
> replaceInfoMap.find(insn.src(i).reg());
> +            if (it != replaceInfoMap.end())
> +            {
> +              ReplaceInfo *info = it->second;
> +              if (CanBeReplaced(info, insn, insn.src(i)))
> +              {
> +                info->toBeReplaceds.insert(&insn.src(i));
> +                info->toBeReplacedInsns.insert(&insn);
> +              }
> +              else
> +              {
> +                replaceInfoMap.erase(it);
> +                delete info;
> +              }
> +            }
> +        }
> +
> +        for (uint8_t i = 0; i < insn.dstNum; ++i)
> +        {
> +          ReplaceInfoMap::iterator it =
> replaceInfoMap.find(insn.dst(i).reg());
> +          if (it != replaceInfoMap.end())
> +          {
> +            ReplaceInfo *info = it->second;
> +            if(&(info->insn) != &insn)
> +            {
> +              replaceInfoMap.erase(it);
> +              delete info;
> +            }
> +          }
> +        }
> +      }
> +
> +      if(replaceInfoMap.empty())
> +        break;
> +    }
> +
> +    cleanReplaceInfoMap();
> +  }
> +
> +  void SelGlobalImmMovOpt::run()
> +  {
> +    bool canbeOpt = false;
> +
> +    /*global immediates are set in entry block, the following is
> example of GEN IR
> +
> +          decl_input.global %41 dst
> +            ## 0 output register ##
> +            ## 0 pushed register
> +            ## 3 blocks ##
> +          LABEL $0
> +            LOADI.uint32 %42 0
> +            LOADI.uint32 %43 48
> +
> +          LABEL $1
> +            MUL.int32 %44 %3   %12
> +            ADD.int32 %49 %42 %48
> +            ...
> +    */
> +    SelectionBlock &block = *mblockList->begin();
> +    for(SelectionInstruction &insn : block.insnList)
> +    {
> +        GenRegister src0 = insn.src(0);
> +        if(insn.opcode == SEL_OP_MOV &&
> +            src0.file == GEN_IMMEDIATE_VALUE &&
> +            (src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_D ||
> src0.type == GEN_TYPE_F))
> +        {
> +          addToReplaceInfoMap(insn);
> +          canbeOpt = true;
> +
> +#ifdef DEBUG_GLOBAL_IMM_OPT
> +          outputSelectionInst(insn);
> +#endif
> +        }
> +    }
> +
> +    if(!canbeOpt) return;
> +
> +    doGlobalCopyPropagation();
> +  }
> +
>    void SelGlobalOptimizer::run()
>    {
>  
>    }
>  
> +  BVAR(OCL_GLOBAL_IMM_OPTIMIZATION, true);
> +
>    void Selection::optimize()
>    {
> +    //do global imm opt first to make more local opt
> +    if(OCL_GLOBAL_IMM_OPTIMIZATION)
> +    {
> +      SelGlobalImmMovOpt gopt(getCtx(), opt_features, blockList);
> +      gopt.run();
> +    }
> +
>      //do basic block level optimization
>      for (SelectionBlock &block : *blockList) {
>        SelBasicBlockOptimizer bbopt(getCtx(), 
> getCtx().getLiveOut(block.bb), opt_features, block);


More information about the Beignet mailing list