[Beignet] [PATCH 1/2] GBE: Add more support of char and short arithmetic

Tue Jun 25 19:49:44 PDT 2013

Thanks for the patch, I have two comments as below:

On Tue, Jun 25, 2013 at 03:38:48PM +0800, Ruiling Song wrote:
> add * / % support of char and short type.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   67 +++++++++++++++++++++++-----
>  backend/src/llvm/llvm_gen_backend.cpp      |    4 +-
>  2 files changed, 57 insertions(+), 14 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 1e5f514..b1c6093 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -1260,30 +1260,73 @@ namespace gbe
>        const Opcode opcode = insn.getOpcode();
>        const Type type = insn.getType();
>        GenRegister dst  = sel.selReg(insn.getDst(0), type);

Could you split the logical for OP_DIV and OP_REM here into
a separate function including the original code to handle
OP_DIV /OP_REM?

I count it roughly. it has more than 60 lines of code and
doesn't have strong relationship with the other code path.

> +      const uint32_t simdWidth = sel.curr.execWidth;
> +      const RegisterFamily family = getFamily(type);
> +
> +      //bytes and shorts must be converted to int for DIV and REM per GEN restriction
> +      if((opcode == OP_DIV || opcode == OP_REM)
> +        && (family == FAMILY_WORD || family == FAMILY_BYTE)) {
> +        GenRegister src0 = sel.selReg(insn.getSrc(0), type);
> +        GenRegister src1 = sel.selReg(insn.getSrc(1), type);
> +        uint32_t function = (opcode == OP_DIV)?
> +                            GEN_MATH_FUNCTION_INT_DIV_QUOTIENT :
> +                            GEN_MATH_FUNCTION_INT_DIV_REMAINDER;
> +        GenRegister tmp0 = src0;
> +        GenRegister tmp1 = src1;
> +        GenRegister tmp2 = dst;
> +        tmp0 = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> +        tmp0 = GenRegister::retype(tmp0, GEN_TYPE_D);
> +        sel.MOV(tmp0, src0);
> +
> +        tmp1 = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> +        tmp1 = GenRegister::retype(tmp1, GEN_TYPE_D);
> +        sel.MOV(tmp1, src1);
> +
> +        tmp2 = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> +        tmp2 = GenRegister::retype(tmp2, GEN_TYPE_D);
> +
> +        sel.MATH(tmp2, function, tmp0, tmp1);
> +        GenRegister unpacked;
> +        if(family == FAMILY_WORD) {
> +          unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD));
> +        } else {
> +          unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
> +        }
> +        unpacked = GenRegister::retype(unpacked, getGenType(type));
> +        sel.MOV(unpacked, tmp2);
> +        sel.MOV(dst, unpacked);
Actually, you may have a chance to save one MOV. Don't create a new temporary
register to do unpacking, just adjust the tmp2's region layout accordingly.
And as you use the dst as tmp2, so you may do it as below:
          if(family == FAMILY_WORD)
            unpacked = GenRegister::unpacked_uw(insn.getDst(0));
          else
            unpacked = GenRegister::unpacked_ub(insn.getDst(0));
          sel.MOV(dst, unpacked);

One thing need to be noted, as the unpacked here and dst are the same register
and with different region layout, It may have problem. If so, you may need to
create a new temporary register rather than reuse the dst as tmp2 directly.
Anyway, you can have a try.

-Zhigang.

>  
> +        markAllChildren(dag);
> +        return true;
> +      }
>        // Immediates not supported
>        if (opcode == OP_DIV || opcode == OP_POW) {
>          GenRegister src0 = sel.selReg(insn.getSrc(0), type);
>          GenRegister src1 = sel.selReg(insn.getSrc(1), type);
>          uint32_t function;
> -        if (type == TYPE_S32 || type == TYPE_U32)
> +        if (type == TYPE_S32 || type == TYPE_U32 ) {
>            function = GEN_MATH_FUNCTION_INT_DIV_QUOTIENT;
> -        else
> +          sel.MATH(dst, function, src0, src1);
> +        } else if(type == TYPE_FLOAT) {
>            function = opcode == OP_DIV ?
>                       GEN_MATH_FUNCTION_FDIV :
>                       GEN_MATH_FUNCTION_POW;
> -        sel.MATH(dst, function, src0, src1);
> +          sel.MATH(dst, function, src0, src1);
> +        } else {
> +          NOT_IMPLEMENTED;
> +        }
>          markAllChildren(dag);
>          return true;
>        }
>        if (opcode == OP_REM) {
>          GenRegister src0 = sel.selReg(insn.getSrc(0), type);
>          GenRegister src1 = sel.selReg(insn.getSrc(1), type);
> -        if (type == TYPE_U32 || type == TYPE_S32) {
> +        if(type == TYPE_S32 || type == TYPE_U32) {
>            sel.MATH(dst, GEN_MATH_FUNCTION_INT_DIV_REMAINDER, src0, src1);
> -          markAllChildren(dag);
> -        } else
> -          NOT_IMPLEMENTED;
> +        } else {
> +          GBE_ASSERTM(0, "Unsupported type in remainder operation!");
> +        }
> +        markAllChildren(dag);
>          return true;
>        }
>  
> @@ -1345,14 +1388,14 @@ namespace gbe
>          case OP_SHR: sel.SHR(dst, src0, src1); break;
>          case OP_ASR: sel.ASR(dst, src0, src1); break;
>          case OP_MUL:
> -          if (type == TYPE_FLOAT || type == TYPE_DOUBLE)
> -            sel.MUL(dst, src0, src1);
> -          else if (type == TYPE_U32 || type == TYPE_S32) {
> +          if (type == TYPE_U32 || type == TYPE_S32) {
>              sel.pop();
>              return false;
>            }
> -          else
> -            NOT_IMPLEMENTED;
> +          else {
> +            GBE_ASSERTM((type != TYPE_S64 && type != TYPE_U64), "64bit integer not supported yet!" );
> +            sel.MUL(dst, src0, src1);
> +          }
>          break;
>          default: NOT_IMPLEMENTED;
>        }
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 5b7754c..b0e8c6c 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1276,10 +1276,10 @@ namespace gbe
>        case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
>        case Instruction::Mul:
>        case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
> -      case Instruction::URem:
> +      case Instruction::URem: ctx.REM(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
>        case Instruction::SRem:
>        case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
> -      case Instruction::UDiv:
> +      case Instruction::UDiv: ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
>        case Instruction::SDiv:
>        case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
>        case Instruction::And:  ctx.AND(type, dst, src0, src1); break;
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet