[Beignet] [PATCH 1/2] Add convert between fp16 and fp32.

Tue Nov 26 20:27:26 PST 2013

LGTM, will push latter. Thanks.

On Fri, Nov 22, 2013 at 07:51:56PM +0800, Yang Rong wrote:
> Use convert instruction in ir, and ALU1 in gen selection.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/backend/gen/gen_mesa_disasm.c  |  2 ++
>  backend/src/backend/gen_context.cpp        |  2 ++
>  backend/src/backend/gen_defs.hpp           |  2 ++
>  backend/src/backend/gen_encoder.cpp        |  2 ++
>  backend/src/backend/gen_encoder.hpp        |  2 ++
>  backend/src/backend/gen_insn_selection.cpp | 16 +++++++++++++---
>  backend/src/backend/gen_insn_selection.hxx |  2 ++
>  backend/src/ir/instruction.cpp             | 22 ++++++++++++++++------
>  backend/src/ir/instruction.hpp             |  4 ++++
>  backend/src/ir/instruction.hxx             |  2 ++
>  backend/src/llvm/llvm_gen_backend.cpp      |  8 ++++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  3 +++
>  12 files changed, 58 insertions(+), 9 deletions(-)
> 
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index f911e7c..1f5adc9 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -65,6 +65,8 @@ static const struct {
>    [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
>    [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
>    [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
> +  [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
> +  [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
>  
>    [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 6007904..4902085 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -158,6 +158,8 @@ namespace gbe
>        case SEL_OP_RNDU: p->RNDU(dst, src); break;
>        case SEL_OP_RNDE: p->RNDE(dst, src); break;
>        case SEL_OP_RNDZ: p->RNDZ(dst, src); break;
> +      case SEL_OP_F16TO32: p->F16TO32(dst, src); break;
> +      case SEL_OP_F32TO16: p->F32TO16(dst, src); break;
>        case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); break;
>        case SEL_OP_CONVI64_TO_I:
>         {
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index 27ce58c..ffa38c0 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -125,6 +125,8 @@ enum opcode {
>    GEN_OPCODE_ASR = 12,
>    GEN_OPCODE_CMP = 16,
>    GEN_OPCODE_CMPN = 17,
> +  GEN_OPCODE_F32TO16 = 19,
> +  GEN_OPCODE_F16TO32 = 20,
>    GEN_OPCODE_JMPI = 32,
>    GEN_OPCODE_IF = 34,
>    GEN_OPCODE_IFF = 35,
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index b0cc931..c372e36 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -940,6 +940,8 @@ namespace gbe
>    ALU1(RNDU)
>    ALU1(FBH)
>    ALU1(FBL)
> +  ALU1(F16TO32)
> +  ALU1(F32TO16)
>    ALU2(SEL)
>    ALU1(NOT)
>    ALU2(AND)
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index d518c4a..13db6ae 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -99,6 +99,8 @@ namespace gbe
>      ALU1(RNDE)
>      ALU1(RNDD)
>      ALU1(RNDU)
> +    ALU1(F16TO32)
> +    ALU1(F32TO16)
>      ALU2(SEL)
>      ALU1(NOT)
>      ALU2(AND)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index eef7847..3661c2b 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -426,6 +426,8 @@ namespace gbe
>      ALU1(LOAD_INT64_IMM)
>      ALU1(RNDZ)
>      ALU1(RNDE)
> +    ALU1(F16TO32)
> +    ALU1(F32TO16)
>      ALU2(SEL)
>      ALU2(SEL_INT64)
>      ALU1(NOT)
> @@ -2643,14 +2645,22 @@ namespace gbe
>        const RegisterFamily srcFamily = getFamily(srcType);
>        const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
>        const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
> +      const Opcode opcode = insn.getOpcode();
>  
> -      if(insn.getOpcode() == ir::OP_SAT_CVT) {
> +      if(opcode == ir::OP_SAT_CVT) {
>          sel.push();
>          sel.curr.saturate = 1;
>        }
>  
>        // We need two instructions to make the conversion
> -      if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
> +      if (opcode == OP_F16TO32) {
> +        sel.F16TO32(dst, src);
> +      } else if (opcode == OP_F32TO16) {
> +        GenRegister unpacked;
> +        unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD));
> +        sel.F32TO16(unpacked, src);
> +        sel.MOV(dst, unpacked);
> +      } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
>          GenRegister unpacked;
>          if (dstFamily == FAMILY_WORD) {
>            const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> @@ -2695,7 +2705,7 @@ namespace gbe
>        } else
>          sel.MOV(dst, src);
>  
> -      if(insn.getOpcode() == ir::OP_SAT_CVT)
> +      if(opcode == ir::OP_SAT_CVT)
>          sel.pop();
>  
>        return true;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 4499006..9fb5da7 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -10,6 +10,8 @@ DECL_SELECTION_IR(RNDE, UnaryInstruction)
>  DECL_SELECTION_IR(RNDD, UnaryInstruction)
>  DECL_SELECTION_IR(RNDU, UnaryInstruction)
>  DECL_SELECTION_IR(FRC, UnaryInstruction)
> +DECL_SELECTION_IR(F16TO32, UnaryInstruction)
> +DECL_SELECTION_IR(F32TO16, UnaryInstruction)
>  DECL_SELECTION_IR(SEL, BinaryInstruction)
>  DECL_SELECTION_IR(SEL_INT64, BinaryInstruction)
>  DECL_SELECTION_IR(AND, BinaryInstruction)
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index c624d7a..c0cf88a 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -283,13 +283,13 @@ namespace ir {
>        public NSrcPolicy<ConvertInstruction, 1>
>      {
>      public:
> -      ConvertInstruction(Type dstType,
> +      ConvertInstruction(Opcode opcode,
> +                         Type dstType,
>                           Type srcType,
>                           Register dst,
> -                         Register src,
> -                         bool saturated=false)
> +                         Register src)
>        {
> -        this->opcode = saturated ? OP_SAT_CVT : OP_CVT;
> +        this->opcode = opcode;
>          this->dst[0] = dst;
>          this->src[0] = src;
>          this->dstType = dstType;
> @@ -1563,12 +1563,22 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
>  
>    // CVT
>    Instruction CVT(Type dstType, Type srcType, Register dst, Register src) {
> -    return internal::ConvertInstruction(dstType, srcType, dst, src).convert();
> +    return internal::ConvertInstruction(OP_CVT, dstType, srcType, dst, src).convert();
>    }
>  
>    // saturated convert
>    Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) {
> -    return internal::ConvertInstruction(dstType, srcType, dst, src, true).convert();
> +    return internal::ConvertInstruction(OP_SAT_CVT, dstType, srcType, dst, src).convert();
> +  }
> +
> +  // CVT
> +  Instruction F16TO32(Type dstType, Type srcType, Register dst, Register src) {
> +    return internal::ConvertInstruction(OP_F16TO32, dstType, srcType, dst, src).convert();
> +  }
> +
> +  // saturated convert
> +  Instruction F32TO16(Type dstType, Type srcType, Register dst, Register src) {
> +    return internal::ConvertInstruction(OP_F32TO16, dstType, srcType, dst, src).convert();
>    }
>  
>    // For all unary functions with given opcode
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index b7eebc0..46577c7 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -642,6 +642,10 @@ namespace ir {
>    Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
>    /*! sat_cvt.{dstType <- srcType} dst src */
>    Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src);
> +  /*! F16TO32.{dstType <- srcType} dst src */
> +  Instruction F16TO32(Type dstType, Type srcType, Register dst, Register src);
> +  /*! F32TO16.{dstType <- srcType} dst src */
> +  Instruction F32TO16(Type dstType, Type srcType, Register dst, Register src);
>    /*! atomic dst addr.space {src1 {src2}} */
>    Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src);
>    /*! bra labelIndex */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index cf79e09..baaaca2 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -64,6 +64,8 @@ DECL_INSN(ORD, CompareInstruction)
>  DECL_INSN(BITCAST, BitCastInstruction)
>  DECL_INSN(CVT, ConvertInstruction)
>  DECL_INSN(SAT_CVT, ConvertInstruction)
> +DECL_INSN(F16TO32, ConvertInstruction)
> +DECL_INSN(F32TO16, ConvertInstruction)
>  DECL_INSN(ATOMIC, AtomicInstruction)
>  DECL_INSN(BRA, BranchInstruction)
>  DECL_INSN(RET, BranchInstruction)
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 9c85c1a..1c932bd 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2128,6 +2128,8 @@ namespace gbe
>        case GEN_OCL_SAT_CONV_F32_TO_I32:
>        case GEN_OCL_SAT_CONV_I32_TO_U32:
>        case GEN_OCL_SAT_CONV_F32_TO_U32:
> +      case GEN_OCL_CONV_F16_TO_F32:
> +      case GEN_OCL_CONV_F32_TO_F16:
>          this->newRegister(&I);
>          break;
>        default:
> @@ -2689,6 +2691,12 @@ namespace gbe
>              DEF(ir::TYPE_U32, ir::TYPE_S32);
>            case GEN_OCL_SAT_CONV_F32_TO_U32:
>              DEF(ir::TYPE_U32, ir::TYPE_FLOAT);
> +          case GEN_OCL_CONV_F16_TO_F32:
> +            ctx.F16TO32(ir::TYPE_FLOAT, ir::TYPE_U16, getRegister(&I), getRegister(I.getOperand(0)));
> +            break;
> +          case GEN_OCL_CONV_F32_TO_F16:
> +            ctx.F32TO16(ir::TYPE_U16, ir::TYPE_FLOAT, getRegister(&I), getRegister(I.getOperand(0)));
> +            break;
>  #undef DEF
>            default: break;
>          }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 71034ab..4b470e0 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -178,3 +178,6 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf)
>  
>  DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati)
>  DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
> +
> +DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32)
> +DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
> \ No newline at end of file
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet