[Beignet] [PATCH] Add the builtin function abs() and the according test case

Song, Ruiling ruiling.song at intel.com
Tue Jun 25 01:59:54 PDT 2013


Hi Junyan,

Per spec, abs() should accept all integer type(char/uchar/short/ushort/int/uint) and corresponding vector type.
It is appreciated if you help add the vector type and also other data types.
Your patch still fails piglit test as missing vector type of char/uchar/short/ushort/uint.
Please also consider add test case to cover other data types.

Thanks!
Ruiling
-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of junyan.he at inbox.com
Sent: Tuesday, June 25, 2013 3:51 PM
To: beignet at lists.freedesktop.org
Cc: Junyan He
Subject: [Beignet] [PATCH] Add the builtin function abs() and the according test case

From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   10 ++++-
 backend/src/llvm/llvm_gen_backend.cpp      |   10 ++++-
 backend/src/llvm/llvm_gen_ocl_function.hxx |    3 +-
 backend/src/ocl_stdlib.h                   |    4 ++
 kernels/compiler_abs.cl                    |    5 +++
 utests/CMakeLists.txt                      |    1 +
 utests/compiler_abs.cpp                    |   61 ++++++++++++++++++++++++++++
 7 files changed, 91 insertions(+), 3 deletions(-)  create mode 100644 kernels/compiler_abs.cl  create mode 100644 utests/compiler_abs.cpp

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 5901419..15b1bd8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1216,7 +1216,15 @@ namespace gbe
       const GenRegister dst = sel.selReg(insn.getDst(0));
       const GenRegister src = sel.selReg(insn.getSrc(0));
       switch (opcode) {
-        case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
+        case ir::OP_ABS:
+          if (insn.getType() == ir::TYPE_S32 || insn.getType() == ir::TYPE_U32) {
+            const GenRegister src_ = GenRegister::retype(src, GEN_TYPE_D);
+            const GenRegister dst_ = GenRegister::retype(dst, GEN_TYPE_D);
+            sel.MOV(dst_, GenRegister::abs(src_));
+          } else {
+            sel.MOV(dst, GenRegister::abs(src));
+          }
+	  break;
         case ir::OP_MOV:
           if (dst.isdf()) {
             ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 5b7754c..9a13070 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1686,6 +1686,7 @@ namespace gbe
       case GEN_OCL_POW:
       case GEN_OCL_RCP:
       case GEN_OCL_ABS:
+      case GEN_OCL_FABS:
       case GEN_OCL_RNDZ:
       case GEN_OCL_RNDE:
       case GEN_OCL_RNDU:
@@ -1842,13 +1843,20 @@ namespace gbe
             ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
             break;
           }
+          case GEN_OCL_ABS:
+          {
+            const ir::Register src = this->getRegister(*AI);
+            const ir::Register dst = this->getRegister(&I);
+            ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src);
+            break;
+          }
           case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
           case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
           case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
           case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break;
-          case GEN_OCL_ABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+          case GEN_OCL_FABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); 
+ break;
           case GEN_OCL_RNDZ: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
           case GEN_OCL_RNDE: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break;
           case GEN_OCL_RNDU: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6cd7298..9cfad78 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -19,7 +19,8 @@ DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, __gen_ocl_get_global_offset2)  DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
 
 // Math function
-DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_fabs)
+DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) DECL_LLVM_GEN_FUNCTION(FABS, 
+__gen_ocl_fabs)
 DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)  DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)  DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 81a0193..eaf8f21 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4337,6 +4337,7 @@ INLINE uint get_global_id(uint dim) {  /////////////////////////////////////////////////////////////////////////////
 // Math Functions (see 6.11.2 of OCL 1.1 spec)  /////////////////////////////////////////////////////////////////////////////
+PURE CONST int __gen_ocl_abs(int x);
 PURE CONST float __gen_ocl_fabs(float x);  PURE CONST float __gen_ocl_sin(float x);  PURE CONST float __gen_ocl_cos(float x); @@ -4515,6 +4516,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) {  // XXX work-around PTX profile  #define sqrt native_sqrt  INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
+INLINE_OVERLOADABLE int __gen_ocl_internal_abs(int x)  { return 
+__gen_ocl_abs(x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x)  { return __gen_ocl_fabs(x); }  INLINE_OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); }  INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); } @@ -4860,6 +4862,7 @@ DECL_VECTOR_1OP(native_exp10, float);  DECL_VECTOR_1OP(__gen_ocl_internal_expm1, float);  DECL_VECTOR_1OP(__gen_ocl_internal_cbrt, float);  DECL_VECTOR_1OP(__gen_ocl_internal_fabs, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_abs, int);
 DECL_VECTOR_1OP(__gen_ocl_internal_trunc, float);  DECL_VECTOR_1OP(__gen_ocl_internal_round, float);  DECL_VECTOR_1OP(__gen_ocl_internal_floor, float); @@ -4987,6 +4990,7 @@ INLINE_OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(fl  INLINE_OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}
 
 // XXX workaround ptx profile
+#define abs __gen_ocl_internal_abs
 #define fabs __gen_ocl_internal_fabs
 #define trunc __gen_ocl_internal_trunc
 #define round __gen_ocl_internal_round
diff --git a/kernels/compiler_abs.cl b/kernels/compiler_abs.cl new file mode 100644 index 0000000..7030a26
--- /dev/null
+++ b/kernels/compiler_abs.cl
@@ -0,0 +1,5 @@
+kernel void compiler_abs(global int *src, global int *dst) {
+  int i = get_global_id(0);
+  dst[i] = abs(src[i]);
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 31152b0..3c8f6ce 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -31,6 +31,7 @@ set (utests_sources
   compiler_double_2.cpp
   compiler_double_3.cpp
   compiler_fabs.cpp
+  compiler_abs.cpp
   compiler_fill_image.cpp
   compiler_fill_image0.cpp
   compiler_fill_image_3d.cpp
diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp new file mode 100644 index 0000000..908a32a
--- /dev/null
+++ b/utests/compiler_abs.cpp
@@ -0,0 +1,61 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+    int f = src[global_id];
+    f = f < 0 ? -f : f;
+    dst[global_id] = f;
+}
+
+void compiler_abs(void)
+{
+    const size_t n = 16;
+    int cpu_dst[16], cpu_src[16];
+
+    // Setup kernel and buffers
+    OCL_CREATE_KERNEL("compiler_abs");
+    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+    OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+    OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+    globals[0] = 16;
+    locals[0] = 16;
+
+    // Run random tests
+    for (uint32_t pass = 0; pass < 8; ++pass) {
+        OCL_MAP_BUFFER(0);
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            cpu_src[i] = ((int*)buf_data[0])[i] = (rand() & 15) - 7;
+
+        // Run the kernel on GPU
+        OCL_NDRANGE(1);
+
+        // Run on CPU
+        for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, 
+ cpu_dst);
+
+        // Compare
+        OCL_MAP_BUFFER(1);
+
+#if 0
+        printf("Raw DATA: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", ((int *)buf_data[0])[i]);
+        }
+
+        printf("\nCPU: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", cpu_dst[i]);
+        }
+        printf("\nGPU: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", ((int *)buf_data[1])[i]);
+        }
+#endif
+
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]);
+        OCL_UNMAP_BUFFER(1);
+        OCL_UNMAP_BUFFER(0);
+    }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_abs);
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list