[Beignet] [PATCH] support 64-bit version "add_sat"

Zhigang Gong zhigang.gong at linux.intel.com
Mon Sep 16 23:55:05 PDT 2013


Two comments :

1. It seems that you use flag 1 directly at code generation stage. This may bring some
conflict with current BB. It's better to do this type of things at instrucation selection
stage and let the allocator to allocate flag register.

2. Is it possible to implement this builtin function at inline c level? IMO, for 64 bit support
performance is not our target for this platform, if we can implement those builtin functions
at c level then we can reduce much maintenance effort. And if you can leave the branching code
at c level, and only leave unbranching part to the backend, then you can avoid use flag directly
at backend.

Any thoughts?

On Fri, Sep 13, 2013 at 03:57:41PM +0800, Homer Hsing wrote:
> tested by piglit:
>   piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-add_sat-1.0.generated.cl
>   piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-add_sat-1.0.generated.cl
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_context.cpp                | 53 ++++++++++++++++++++++
>  backend/src/backend/gen_context.hpp                |  1 +
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
>  backend/src/backend/gen_insn_selection.cpp         | 20 ++++++++
>  backend/src/backend/gen_insn_selection.hxx         |  1 +
>  backend/src/ocl_stdlib.tmpl.h                      | 13 +++++-
>  6 files changed, 87 insertions(+), 2 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index fdb305b..b88b6b4 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -945,6 +945,59 @@ namespace gbe
>      p->pop();
>    }
>  
> +  void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) {
> +    GenRegister x = ra->genReg(insn.src(0));
> +    GenRegister y = ra->genReg(insn.src(1));
> +    GenRegister dst = ra->genReg(insn.dst(0));
> +    GenRegister a = ra->genReg(insn.dst(1));
> +    GenRegister b = ra->genReg(insn.dst(2));
> +    GenRegister c = ra->genReg(insn.dst(3));
> +    GenRegister d = ra->genReg(insn.dst(4));
> +    GenRegister e = ra->genReg(insn.dst(5));
> +    loadTopHalf(a, x);
> +    loadBottomHalf(b, x);
> +    loadTopHalf(c, y);
> +    loadBottomHalf(d, y);
> +    switch(insn.opcode) {
> +      case SEL_OP_I64SATADD:
> +        if(dst.is_signed_int())
> +          p->SHR(e, a, GenRegister::immud(31));
> +        addWithCarry(b, b, d);
> +        addWithCarry(a, a, d);
> +        addWithCarry(a, a, c);
> +        p->ADD(c, c, d);
> +        p->push();
> +        p->curr.predicate = GEN_PREDICATE_NONE;
> +        p->curr.physicalFlag = 1;
> +        p->curr.flag = 1;
> +        p->curr.subFlag = 0;
> +        if(! dst.is_signed_int()) {
> +          p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0));
> +          p->curr.predicate = GEN_PREDICATE_NORMAL;
> +          p->MOV(a, GenRegister::immud(0xFFFFFFFFu));
> +          p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
> +        } else {
> +          p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1));
> +          p->curr.predicate = GEN_PREDICATE_NORMAL;
> +          p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u));
> +          p->MOV(a, GenRegister::immud(0x80000000u));
> +          p->MOV(b, GenRegister::immud(0));
> +          p->curr.predicate = GEN_PREDICATE_NONE;
> +          p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0));
> +          p->curr.predicate = GEN_PREDICATE_NORMAL;
> +          p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u));
> +          p->MOV(a, GenRegister::immud(0x7FFFFFFFu));
> +          p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
> +        }
> +        p->pop();
> +        break;
> +      default:
> +        NOT_IMPLEMENTED;
> +    }
> +    storeTopHalf(dst, a);
> +    storeBottomHalf(dst, b);
> +  }
> +
>    void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
>      int execWidth = p->curr.execWidth;
>      src = src.top_half();
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 23da406..e7a9d3c 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -106,6 +106,7 @@ namespace gbe
>      void emitI64RHADDInstruction(const SelectionInstruction &insn);
>      void emitI64ShiftInstruction(const SelectionInstruction &insn);
>      void emitI64CompareInstruction(const SelectionInstruction &insn);
> +    void emitI64SATADDInstruction(const SelectionInstruction &insn);
>      void emitI64ToFloatInstruction(const SelectionInstruction &insn);
>      void emitCompareInstruction(const SelectionInstruction &insn);
>      void emitJumpInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 0a12a91..98db756 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -35,3 +35,4 @@ DECL_GEN7_SCHEDULE(UnSpillReg,      80,        1,        1)
>  DECL_GEN7_SCHEDULE(GetImageInfo,    20,        4,        2)
>  DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
>  DECL_GEN7_SCHEDULE(I64MUL,          20,        4,        2)
> +DECL_GEN7_SCHEDULE(I64SATADD,       20,        4,        2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index a38d8c4..55720ff 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -483,6 +483,8 @@ namespace gbe
>      void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
>      /*! Compare 64-bit integer */
>      void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]);
> +    /*! Saturated addition of 64-bit integer */
> +    void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]);
>      /*! Encode a barrier instruction */
>      void BARRIER(GenRegister src);
>      /*! Encode a barrier instruction */
> @@ -1083,6 +1085,15 @@ namespace gbe
>      insn->extra.function = conditional;
>    }
>  
> +  void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 6, 2);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src0;
> +    insn->src(1) = src1;
> +    for(int i=0; i<5; i++)
> +      insn->dst(i + 1) = tmp[i];
> +  }
> +
>    void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) {
>      SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1);
>      insn->dst(0) = dst;
> @@ -1632,6 +1643,15 @@ namespace gbe
>              sel.ADD(dst, src0, src1);
>            break;
>          case OP_ADDSAT:
> +          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> +            GenRegister tmp[5];
> +            for(int i=0; i<5; i++) {
> +              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> +              tmp[i].type = GEN_TYPE_UD;
> +            }
> +            sel.I64SATADD(dst, src0, src1, tmp);
> +            break;
> +          }
>            sel.push();
>              sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
>              sel.ADD(dst, src0, src1);
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 63ad810..9814cc0 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -28,6 +28,7 @@ DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
>  DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
>  DECL_SELECTION_IR(ADD, BinaryInstruction)
>  DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
> +DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction)
>  DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(MUL, BinaryInstruction)
>  DECL_SELECTION_IR(I64MUL, I64MULInstruction)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 692440f..0d63777 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -182,7 +182,6 @@ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); }
>  INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); }
>  SDEF(char);
>  SDEF(short);
> -SDEF(long);
>  #undef SDEF
>  OVERLOADABLE int ocl_sadd_sat(int x, int y);
>  INLINE_OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); }
> @@ -190,6 +189,17 @@ OVERLOADABLE int ocl_ssub_sat(int x, int y);
>  INLINE_OVERLOADABLE int sub_sat(int x, int y) {
>    return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y);
>  }
> +OVERLOADABLE long ocl_sadd_sat(long x, long y);
> +INLINE_OVERLOADABLE long add_sat(long x, long y) {
> +  union {long l; uint i[2];} ux, uy;
> +  ux.l = x;
> +  uy.l = y;
> +  if((ux.i[1] ^ uy.i[1]) & 0x80000000u)
> +    return x + y;
> +  return ocl_sadd_sat(x, y);
> +}
> +OVERLOADABLE long ocl_ssub_sat(long x, long y);
> +INLINE_OVERLOADABLE long sub_sat(long x, long y) { return ocl_ssub_sat(x, y); }
>  #define UDEF(TYPE)                                                              \
>  OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y);                          \
>  OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y);                          \
> @@ -201,7 +211,6 @@ UDEF(uint);
>  UDEF(ulong);
>  #undef UDEF
>  
> -
>  uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) {
>      return add_sat((uchar)x, (uchar)0);
>  }
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list