[Beignet] [PATCH] saturated conversion of native GPU data type, larger to narrower

Zhigang Gong zhigang.gong at linux.intel.com
Wed Oct 9 22:39:12 PDT 2013


Pushed, thanks.

On Wed, Oct 09, 2013 at 03:48:46PM +0800, Homer Hsing wrote:
> This patch supports saturated conversion of
> native GPU data type (char/short/int/float),
> from a larger-range data type to a narrower-range data type.
> For instance,  convert_uchar_sat(int)
> 
> Several test cases are in this patch.
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp | 13 +++++-
>  backend/src/ir/instruction.cpp             | 10 ++++-
>  backend/src/ir/instruction.hpp             |  2 +
>  backend/src/ir/instruction.hxx             |  1 +
>  backend/src/llvm/llvm_gen_backend.cpp      | 69 ++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx | 28 ++++++++++++
>  backend/src/ocl_stdlib.tmpl.h              | 28 ++++++++++--
>  kernels/builtin_convert_sat.cl             | 30 +++++++++++++
>  utests/CMakeLists.txt                      |  1 +
>  utests/builtin_convert_sat.cpp             | 69 ++++++++++++++++++++++++++++++
>  10 files changed, 244 insertions(+), 7 deletions(-)
>  create mode 100644 kernels/builtin_convert_sat.cl
>  create mode 100644 utests/builtin_convert_sat.cpp
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index bd52885..cddd76e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2538,15 +2538,20 @@ namespace gbe
>        const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
>        const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
>  
> +      if(insn.getOpcode() == ir::OP_SAT_CVT) {
> +        sel.push();
> +        sel.curr.saturate = 1;
> +      }
> +
>        // We need two instructions to make the conversion
>        if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
>          GenRegister unpacked;
>          if (dstFamily == FAMILY_WORD) {
> -          const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
> +          const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
>            unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD));
>            unpacked = GenRegister::retype(unpacked, type);
>          } else {
> -          const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
> +          const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
>            unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
>            unpacked = GenRegister::retype(unpacked, type);
>          }
> @@ -2581,6 +2586,10 @@ namespace gbe
>          }
>        } else
>          sel.MOV(dst, src);
> +
> +      if(insn.getOpcode() == ir::OP_SAT_CVT)
> +        sel.pop();
> +
>        return true;
>      }
>      DECL_CTOR(ConvertInstruction, 1, 1);
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index a973082..9e5b6f6 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -252,9 +252,10 @@ namespace ir {
>        ConvertInstruction(Type dstType,
>                           Type srcType,
>                           Register dst,
> -                         Register src)
> +                         Register src,
> +                         bool saturated=false)
>        {
> -        this->opcode = OP_CVT;
> +        this->opcode = saturated ? OP_SAT_CVT : OP_CVT;
>          this->dst[0] = dst;
>          this->src[0] = src;
>          this->dstType = dstType;
> @@ -1469,6 +1470,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
>      return internal::ConvertInstruction(dstType, srcType, dst, src).convert();
>    }
>  
> +  // saturated convert
> +  Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) {
> +    return internal::ConvertInstruction(dstType, srcType, dst, src, true).convert();
> +  }
> +
>    // For all unary functions with given opcode
>    Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) {
>      return internal::AtomicInstruction(atomicOp, dst, space, src).convert();
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 27a34d1..90c819b 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -623,6 +623,8 @@ namespace ir {
>    Instruction GT(Type type, Register dst, Register src0, Register src1);
>    /*! cvt.{dstType <- srcType} dst src */
>    Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
> +  /*! sat_cvt.{dstType <- srcType} dst src */
> +  Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src);
>    /*! atomic dst addr.space {src1 {src2}} */
>    Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src);
>    /*! bra labelIndex */
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 1a9f867..cd60349 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -61,6 +61,7 @@ DECL_INSN(LT, CompareInstruction)
>  DECL_INSN(GE, CompareInstruction)
>  DECL_INSN(GT, CompareInstruction)
>  DECL_INSN(CVT, ConvertInstruction)
> +DECL_INSN(SAT_CVT, ConvertInstruction)
>  DECL_INSN(ATOMIC, AtomicInstruction)
>  DECL_INSN(BRA, BranchInstruction)
>  DECL_INSN(RET, BranchInstruction)
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 3fe6085..493d152 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1910,6 +1910,28 @@ namespace gbe
>        case GEN_OCL_I64RHADD:
>        case GEN_OCL_I64_MAD_SAT:
>        case GEN_OCL_I64_MAD_SATU:
> +      case GEN_OCL_SAT_CONV_U8_TO_I8:
> +      case GEN_OCL_SAT_CONV_I16_TO_I8:
> +      case GEN_OCL_SAT_CONV_U16_TO_I8:
> +      case GEN_OCL_SAT_CONV_I32_TO_I8:
> +      case GEN_OCL_SAT_CONV_U32_TO_I8:
> +      case GEN_OCL_SAT_CONV_F32_TO_I8:
> +      case GEN_OCL_SAT_CONV_I8_TO_U8:
> +      case GEN_OCL_SAT_CONV_I16_TO_U8:
> +      case GEN_OCL_SAT_CONV_U16_TO_U8:
> +      case GEN_OCL_SAT_CONV_I32_TO_U8:
> +      case GEN_OCL_SAT_CONV_U32_TO_U8:
> +      case GEN_OCL_SAT_CONV_F32_TO_U8:
> +      case GEN_OCL_SAT_CONV_U16_TO_I16:
> +      case GEN_OCL_SAT_CONV_I32_TO_I16:
> +      case GEN_OCL_SAT_CONV_U32_TO_I16:
> +      case GEN_OCL_SAT_CONV_F32_TO_I16:
> +      case GEN_OCL_SAT_CONV_I16_TO_U16:
> +      case GEN_OCL_SAT_CONV_I32_TO_U16:
> +      case GEN_OCL_SAT_CONV_U32_TO_U16:
> +      case GEN_OCL_SAT_CONV_F32_TO_U16:
> +      case GEN_OCL_SAT_CONV_F32_TO_I32:
> +      case GEN_OCL_SAT_CONV_F32_TO_U32:
>          this->newRegister(&I);
>          break;
>        default:
> @@ -2415,6 +2437,53 @@ namespace gbe
>              ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1);
>              break;
>             }
> +#define DEF(DST_TYPE, SRC_TYPE) \
> +  { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), getRegister(I.getOperand(0))); break; }
> +          case GEN_OCL_SAT_CONV_U8_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_U8);
> +          case GEN_OCL_SAT_CONV_I16_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_S16);
> +          case GEN_OCL_SAT_CONV_U16_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_U16);
> +          case GEN_OCL_SAT_CONV_I32_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_S32);
> +          case GEN_OCL_SAT_CONV_U32_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_U32);
> +          case GEN_OCL_SAT_CONV_F32_TO_I8:
> +            DEF(ir::TYPE_S8, ir::TYPE_FLOAT);
> +          case GEN_OCL_SAT_CONV_I8_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_S8);
> +          case GEN_OCL_SAT_CONV_I16_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_S16);
> +          case GEN_OCL_SAT_CONV_U16_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_U16);
> +          case GEN_OCL_SAT_CONV_I32_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_S32);
> +          case GEN_OCL_SAT_CONV_U32_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_U32);
> +          case GEN_OCL_SAT_CONV_F32_TO_U8:
> +            DEF(ir::TYPE_U8, ir::TYPE_FLOAT);
> +          case GEN_OCL_SAT_CONV_U16_TO_I16:
> +            DEF(ir::TYPE_S16, ir::TYPE_U16);
> +          case GEN_OCL_SAT_CONV_I32_TO_I16:
> +            DEF(ir::TYPE_S16, ir::TYPE_S32);
> +          case GEN_OCL_SAT_CONV_U32_TO_I16:
> +            DEF(ir::TYPE_S16, ir::TYPE_U32);
> +          case GEN_OCL_SAT_CONV_F32_TO_I16:
> +            DEF(ir::TYPE_S16, ir::TYPE_FLOAT);
> +          case GEN_OCL_SAT_CONV_I16_TO_U16:
> +            DEF(ir::TYPE_U16, ir::TYPE_S16);
> +          case GEN_OCL_SAT_CONV_I32_TO_U16:
> +            DEF(ir::TYPE_U16, ir::TYPE_S32);
> +          case GEN_OCL_SAT_CONV_U32_TO_U16:
> +            DEF(ir::TYPE_U16, ir::TYPE_U32);
> +          case GEN_OCL_SAT_CONV_F32_TO_U16:
> +            DEF(ir::TYPE_U16, ir::TYPE_FLOAT);
> +          case GEN_OCL_SAT_CONV_F32_TO_I32:
> +            DEF(ir::TYPE_S32, ir::TYPE_FLOAT);
> +          case GEN_OCL_SAT_CONV_F32_TO_U32:
> +            DEF(ir::TYPE_U32, ir::TYPE_FLOAT);
> +#undef DEF
>            default: break;
>          }
>        }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 321fc4e..00dc8ab 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -146,3 +146,31 @@ DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
>  
>  // get sampler info
>  DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info)
> +
> +// saturate convert
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8,  _Z16convert_char_sath)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf)
> +
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8,  _Z17convert_uchar_satc)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf)
> +
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf)
> +
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf)
> +
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf)
> +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 170ec70..8dfea09 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -218,9 +218,31 @@ UDEF(uint);
>  UDEF(ulong);
>  #undef UDEF
>  
> -uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) {
> -    return add_sat((uchar)x, (uchar)0);
> -}
> +#define DEF(DSTTYPE, SRCTYPE) \
> +  OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);
> +DEF(char, uchar);
> +DEF(char, short);
> +DEF(char, ushort);
> +DEF(char, int);
> +DEF(char, uint);
> +DEF(char, float);
> +DEF(uchar, char);
> +DEF(uchar, short);
> +DEF(uchar, ushort);
> +DEF(uchar, int);
> +DEF(uchar, uint);
> +DEF(uchar, float);
> +DEF(short, ushort);
> +DEF(short, int);
> +DEF(short, uint);
> +DEF(short, float);
> +DEF(ushort, short);
> +DEF(ushort, int);
> +DEF(ushort, uint);
> +DEF(ushort, float);
> +DEF(int, float);
> +DEF(uint, float);
> +#undef DEF
>  
>  INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); }
>  INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); }
> diff --git a/kernels/builtin_convert_sat.cl b/kernels/builtin_convert_sat.cl
> new file mode 100644
> index 0000000..18d88ab
> --- /dev/null
> +++ b/kernels/builtin_convert_sat.cl
> @@ -0,0 +1,30 @@
> +#define DEF(DSTTYPE, SRCTYPE) \
> +  kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## _sat(global SRCTYPE *src, global DSTTYPE *dst) { \
> +  int i = get_global_id(0); \
> +  dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \
> +}
> +
> +DEF(char, uchar);
> +DEF(char, short);
> +DEF(char, ushort);
> +DEF(char, int);
> +DEF(char, uint);
> +DEF(char, float);
> +DEF(uchar, char);
> +DEF(uchar, short);
> +DEF(uchar, ushort);
> +DEF(uchar, int);
> +DEF(uchar, uint);
> +DEF(uchar, float);
> +DEF(short, ushort);
> +DEF(short, int);
> +DEF(short, uint);
> +DEF(short, float);
> +DEF(ushort, short);
> +DEF(ushort, int);
> +DEF(ushort, uint);
> +DEF(ushort, float);
> +DEF(int, float);
> +DEF(uint, float);
> +#undef DEF
> +
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index 9b93993..72bff84 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -126,6 +126,7 @@ set (utests_sources
>    builtin_num_groups.cpp
>    builtin_local_id.cpp
>    builtin_acos_asin.cpp
> +  builtin_convert_sat.cpp
>    runtime_createcontext.cpp
>    runtime_null_kernel_arg.cpp
>    runtime_event.cpp
> diff --git a/utests/builtin_convert_sat.cpp b/utests/builtin_convert_sat.cpp
> new file mode 100644
> index 0000000..0bf561c
> --- /dev/null
> +++ b/utests/builtin_convert_sat.cpp
> @@ -0,0 +1,69 @@
> +#include <cstdint>
> +#include "utest_helper.hpp"
> +
> +typedef unsigned char uchar;
> +typedef unsigned short ushort;
> +
> +int64_t my_rand(void) {
> +  int64_t x = rand() - RAND_MAX/2;
> +  int64_t y = rand() - RAND_MAX/2;
> +  return x * y;
> +}
> +
> +#define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \
> +void builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \
> +{ \
> +  const int n = 128; \
> +  OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" # SRC_TYPE "_to_" # DST_TYPE "_sat"); \
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(SRC_TYPE), NULL); \
> +  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \
> +  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \
> +  globals[0] = n; \
> +  locals[0] = 16; \
> +  OCL_MAP_BUFFER(0); \
> +  for (int i = 0; i < n; i++) \
> +    ((SRC_TYPE *)buf_data[0])[i] = my_rand(); \
> +  OCL_UNMAP_BUFFER(0); \
> +  OCL_NDRANGE(1); \
> +  OCL_MAP_BUFFER(0); \
> +  OCL_MAP_BUFFER(1); \
> +  for (int i = 0; i < n; i++) { \
> +    SRC_TYPE src = ((SRC_TYPE *)buf_data[0])[i]; \
> +    DST_TYPE dst; \
> +    if (src > DST_MAX) \
> +      dst = DST_MAX; \
> +    else if (src < DST_MIN) \
> +      dst = DST_MIN; \
> +    else \
> +      dst = src; \
> +    OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \
> +  } \
> +  OCL_UNMAP_BUFFER(0); \
> +  OCL_UNMAP_BUFFER(1); \
> +} \
> +MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat);
> +
> +DEF(char, uchar, -128, 127);
> +DEF(char, short, -128, 127);
> +DEF(char, ushort, -128, 127);
> +DEF(char, int, -128, 127);
> +DEF(char, uint, -128, 127);
> +DEF(char, float, -128, 127);
> +DEF(uchar, char, 0, 255);
> +DEF(uchar, short, 0, 255);
> +DEF(uchar, ushort, 0, 255);
> +DEF(uchar, int, 0, 255);
> +DEF(uchar, uint, 0, 255);
> +DEF(uchar, float, 0, 255);
> +DEF(short, ushort, -32768, 32767);
> +DEF(short, int, -32768, 32767);
> +DEF(short, uint, -32768, 32767);
> +DEF(short, float, -32768, 32767);
> +DEF(ushort, short, 0, 65535);
> +DEF(ushort, int, 0, 65535);
> +DEF(ushort, uint, 0, 65535);
> +DEF(ushort, float, 0, 65535);
> +DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF);
> +DEF(uint, float, 0, 0xffffffffu);
> +#undef DEF
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list