[Beignet] [PATCH v2] add 64-bit version of "rhadd"

Zhigang Gong zhigang.gong at linux.intel.com
Mon Sep 16 22:27:14 PDT 2013


Pushed, thanks.

On Wed, Sep 11, 2013 at 11:21:37AM +0800, Homer Hsing wrote:
> v2:
>   keep highest carry bit
> 
> tested by piglit test cases:
>   piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-rhadd-1.0.generated.cl
>   piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-rhadd-1.0.generated.cl
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_context.cpp                | 30 ++++++++++++++++++++++
>  backend/src/backend/gen_context.hpp                |  1 +
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
>  backend/src/backend/gen_insn_selection.cpp         | 19 ++++++++++++++
>  backend/src/backend/gen_insn_selection.hxx         |  1 +
>  backend/src/ir/instruction.cpp                     |  1 +
>  backend/src/ir/instruction.hpp                     |  2 ++
>  backend/src/ir/instruction.hxx                     |  1 +
>  backend/src/llvm/llvm_gen_backend.cpp              | 11 ++++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx         |  3 ++-
>  backend/src/ocl_stdlib.tmpl.h                      | 15 ++++++++---
>  11 files changed, 80 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 84cc094..20de5ed 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -485,6 +485,36 @@ namespace gbe
>      storeTopHalf(dest, c);
>    }
>  
> +  void GenContext::emitI64RHADDInstruction(const SelectionInstruction &insn) {
> +    GenRegister dest = ra->genReg(insn.dst(0));
> +    GenRegister x = ra->genReg(insn.src(0));
> +    GenRegister y = ra->genReg(insn.src(1));
> +    GenRegister a = ra->genReg(insn.dst(1));
> +    GenRegister b = ra->genReg(insn.dst(2));
> +    GenRegister c = ra->genReg(insn.dst(3));
> +    GenRegister d = ra->genReg(insn.dst(4));
> +    a.type = b.type = c.type = d.type = GEN_TYPE_UD;
> +    loadBottomHalf(a, x);
> +    loadBottomHalf(b, y);
> +    addWithCarry(a, a, b);
> +    p->MOV(c, GenRegister::immud(1));
> +    addWithCarry(a, a, c);
> +    p->ADD(b, b, c);
> +    loadTopHalf(c, x);
> +    loadTopHalf(d, y);
> +    addWithCarry(c, c, b);
> +    addWithCarry(c, c, d);
> +    p->ADD(b, b, d);
> +    p->SHR(a, a, GenRegister::immud(1));
> +    p->SHL(d, c, GenRegister::immud(31));
> +    p->OR(a, a, d);
> +    p->SHR(c, c, GenRegister::immud(1));
> +    p->SHL(d, b, GenRegister::immud(31));
> +    p->OR(c, c, d);
> +    storeBottomHalf(dest, a);
> +    storeTopHalf(dest, c);
> +  }
> +
>    void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) {
>      GenRegister dest = ra->genReg(insn.dst(0));
>      GenRegister x = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 4601242..9c9299d 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -97,6 +97,7 @@ namespace gbe
>      void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
>      void emitTernaryInstruction(const SelectionInstruction &insn);
>      void emitI64HADDInstruction(const SelectionInstruction &insn);
> +    void emitI64RHADDInstruction(const SelectionInstruction &insn);
>      void emitI64ShiftInstruction(const SelectionInstruction &insn);
>      void emitI64CompareInstruction(const SelectionInstruction &insn);
>      void emitCompareInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 445b461..2035741 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -7,6 +7,7 @@ DECL_GEN7_SCHEDULE(BinaryWithTemp,  20,        4,        2)
>  DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(I64Shift,        20,        4,        2)
>  DECL_GEN7_SCHEDULE(I64HADD,         20,        4,        2)
> +DECL_GEN7_SCHEDULE(I64RHADD,        20,        4,        2)
>  DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(I64Compare,      20,        4,        2)
>  DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 49ef601..2d34022 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -472,6 +472,8 @@ namespace gbe
>  #undef I64Shift
>      /*! (x+y)>>1 without mod. overflow */
>      void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
> +    /*! (x+y+1)>>1 without mod. overflow */
> +    void I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
>      /*! Shift a 64-bit integer */
>      void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
>      /*! Compare 64-bit integer */
> @@ -1085,6 +1087,15 @@ namespace gbe
>        insn->dst(i + 1) = tmp[i];
>    }
>  
> +  void Selection::Opaque::I64RHADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64RHADD, 5, 2);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src0;
> +    insn->src(1) = src1;
> +    for(int i = 0; i < 4; i ++)
> +      insn->dst(i + 1) = tmp[i];
> +  }
> +
>    void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
>      SelectionInstruction *insn = this->appendInsn(opcode, 7, 2);
>      insn->dst(0) = dst;
> @@ -1687,6 +1698,14 @@ namespace gbe
>            sel.I64HADD(dst, src0, src1, tmp);
>            break;
>           }
> +        case OP_I64RHADD:
> +         {
> +          GenRegister tmp[4];
> +          for(int i=0; i<4; i++)
> +            tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> +          sel.I64RHADD(dst, src0, src1, tmp);
> +          break;
> +         }
>          case OP_UPSAMPLE_SHORT:
>            sel.UPSAMPLE_SHORT(dst, src0, src1);
>            break;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 0083f7d..86773cd 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -63,6 +63,7 @@ DECL_SELECTION_IR(FBL, UnaryInstruction)
>  DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
> +DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 6bbe37e..1925d93 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1349,6 +1349,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
>    DECL_EMIT_FUNCTION(HADD)
>    DECL_EMIT_FUNCTION(RHADD)
>    DECL_EMIT_FUNCTION(I64HADD)
> +  DECL_EMIT_FUNCTION(I64RHADD)
>  
>  #undef DECL_EMIT_FUNCTION
>  
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index fab6d73..2a06f76 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -535,6 +535,8 @@ namespace ir {
>    Instruction RHADD(Type type, Register dst, Register src0, Register src1);
>    /*! i64hadd.type dst src */
>    Instruction I64HADD(Type type, Register dst, Register src0, Register src1);
> +  /*! i64rhadd.type dst src */
> +  Instruction I64RHADD(Type type, Register dst, Register src0, Register src1);
>    /*! tan.type dst src */
>    Instruction RCP(Type type, Register dst, Register src);
>    /*! abs.type dst src */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 6af0899..7ead344 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -78,6 +78,7 @@ DECL_INSN(FBL, UnaryInstruction)
>  DECL_INSN(HADD, BinaryInstruction)
>  DECL_INSN(RHADD, BinaryInstruction)
>  DECL_INSN(I64HADD, BinaryInstruction)
> +DECL_INSN(I64RHADD, BinaryInstruction)
>  DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction)
>  DECL_INSN(UPSAMPLE_INT, BinaryInstruction)
>  DECL_INSN(UPSAMPLE_LONG, BinaryInstruction)
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 3c04565..6018d7d 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1842,6 +1842,7 @@ namespace gbe
>        case GEN_OCL_HADD:
>        case GEN_OCL_RHADD:
>        case GEN_OCL_I64HADD:
> +      case GEN_OCL_I64RHADD:
>          this->newRegister(&I);
>          break;
>        default:
> @@ -2296,6 +2297,16 @@ namespace gbe
>              ctx.RHADD(getUnsignedType(ctx, I.getType()), dst, src0, src1);
>              break;
>            }
> +          case GEN_OCL_I64RHADD:
> +           {
> +            GBE_ASSERT(AI != AE);
> +            const ir::Register src0 = this->getRegister(*(AI++));
> +            GBE_ASSERT(AI != AE);
> +            const ir::Register src1 = this->getRegister(*(AI++));
> +            const ir::Register dst = this->getRegister(&I);
> +            ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1);
> +            break;
> +           }
>            default: break;
>          }
>        }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 13d8f66..58df2b0 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -132,8 +132,9 @@ DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
>  DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
>  DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
>  DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj)
> -DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
> +DECL_LLVM_GEN_FUNCTION(RHADD, _Z15__gen_ocl_rhaddjj)
>  DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm)
> +DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm)
>  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
>  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
>  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index e87fea5..eadcc00 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -463,7 +463,7 @@ INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) {
>  }
>  
>  OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y);
> -PURE CONST uint __gen_ocl_rhadd(uint x, uint y);
> +OVERLOADABLE uint __gen_ocl_rhadd(uint x, uint y);
>  #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
>  #define DEF(type) INLINE_OVERLOADABLE type hadd(type x, type y) { return (x + y) >> 1; }
>  DEC
> @@ -478,9 +478,14 @@ INLINE_OVERLOADABLE int hadd(int x, int y) {
>           __gen_ocl_hadd((uint)x, (uint)y);
>  }
>  INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); }
> -INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd(x, y); }
> +INLINE_OVERLOADABLE int rhadd(int x, int y) {
> +  return (x < 0 && y > 0) || (x > 0 && y < 0) ?
> +         ((x + y + 1) >> 1) :
> +         __gen_ocl_rhadd((uint)x, (uint)y);
> + }
>  INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, y); }
>  OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y);
> +OVERLOADABLE ulong __gen_ocl_rhadd(ulong x, ulong y);
>  INLINE_OVERLOADABLE long hadd(long x, long y) {
>    return (x < 0 && y > 0) || (x > 0 && y < 0) ?
>           ((x + y) >> 1) :
> @@ -490,10 +495,12 @@ INLINE_OVERLOADABLE ulong hadd(ulong x, ulong y) {
>    return __gen_ocl_hadd(x, y);
>  }
>  INLINE_OVERLOADABLE long rhadd(long x, long y) {
> -  return 0;
> +  return (x < 0 && y > 0) || (x > 0 && y < 0) ?
> +         ((x + y + 1) >> 1) :
> +         __gen_ocl_rhadd((ulong)x, (ulong)y);
>  }
>  INLINE_OVERLOADABLE ulong rhadd(ulong x, ulong y) {
> -  return 0;
> +  return __gen_ocl_rhadd(x, y);
>  }
>  
>  int __gen_ocl_abs(int x);
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list