[Beignet] [PATCH] Add the builtin function abs() and the according test case

junyan.he at inbox.com junyan.he at inbox.com
Tue Jun 25 00:50:54 PDT 2013


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   10 ++++-
 backend/src/llvm/llvm_gen_backend.cpp      |   10 ++++-
 backend/src/llvm/llvm_gen_ocl_function.hxx |    3 +-
 backend/src/ocl_stdlib.h                   |    4 ++
 kernels/compiler_abs.cl                    |    5 +++
 utests/CMakeLists.txt                      |    1 +
 utests/compiler_abs.cpp                    |   61 ++++++++++++++++++++++++++++
 7 files changed, 91 insertions(+), 3 deletions(-)
 create mode 100644 kernels/compiler_abs.cl
 create mode 100644 utests/compiler_abs.cpp

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 5901419..15b1bd8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1216,7 +1216,15 @@ namespace gbe
       const GenRegister dst = sel.selReg(insn.getDst(0));
       const GenRegister src = sel.selReg(insn.getSrc(0));
       switch (opcode) {
-        case ir::OP_ABS: sel.MOV(dst, GenRegister::abs(src)); break;
+        case ir::OP_ABS:
+          if (insn.getType() == ir::TYPE_S32 || insn.getType() == ir::TYPE_U32) {
+            const GenRegister src_ = GenRegister::retype(src, GEN_TYPE_D);
+            const GenRegister dst_ = GenRegister::retype(dst, GEN_TYPE_D);
+            sel.MOV(dst_, GenRegister::abs(src_));
+          } else {
+            sel.MOV(dst, GenRegister::abs(src));
+          }
+	  break;
         case ir::OP_MOV:
           if (dst.isdf()) {
             ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 5b7754c..9a13070 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1686,6 +1686,7 @@ namespace gbe
       case GEN_OCL_POW:
       case GEN_OCL_RCP:
       case GEN_OCL_ABS:
+      case GEN_OCL_FABS:
       case GEN_OCL_RNDZ:
       case GEN_OCL_RNDE:
       case GEN_OCL_RNDU:
@@ -1842,13 +1843,20 @@ namespace gbe
             ctx.POW(ir::TYPE_FLOAT, dst, src0, src1);
             break;
           }
+          case GEN_OCL_ABS:
+          {
+            const ir::Register src = this->getRegister(*AI);
+            const ir::Register dst = this->getRegister(&I);
+            ctx.ALU1(ir::OP_ABS, ir::TYPE_S32, dst, src);
+            break;
+          }
           case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break;
           case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break;
           case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break;
           case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break;
           case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break;
           case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break;
-          case GEN_OCL_ABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
+          case GEN_OCL_FABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break;
           case GEN_OCL_RNDZ: this->emitUnaryCallInst(I,CS,ir::OP_RNDZ); break;
           case GEN_OCL_RNDE: this->emitUnaryCallInst(I,CS,ir::OP_RNDE); break;
           case GEN_OCL_RNDU: this->emitUnaryCallInst(I,CS,ir::OP_RNDU); break;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6cd7298..9cfad78 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -19,7 +19,8 @@ DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, __gen_ocl_get_global_offset2)
 DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim)
 
 // Math function
-DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_fabs)
+DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
+DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs)
 DECL_LLVM_GEN_FUNCTION(COS, __gen_ocl_cos)
 DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin)
 DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 81a0193..eaf8f21 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4337,6 +4337,7 @@ INLINE uint get_global_id(uint dim) {
 /////////////////////////////////////////////////////////////////////////////
 // Math Functions (see 6.11.2 of OCL 1.1 spec)
 /////////////////////////////////////////////////////////////////////////////
+PURE CONST int __gen_ocl_abs(int x);
 PURE CONST float __gen_ocl_fabs(float x);
 PURE CONST float __gen_ocl_sin(float x);
 PURE CONST float __gen_ocl_cos(float x);
@@ -4515,6 +4516,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) {
 // XXX work-around PTX profile
 #define sqrt native_sqrt
 INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); }
+INLINE_OVERLOADABLE int __gen_ocl_internal_abs(int x)  { return __gen_ocl_abs(x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x)  { return __gen_ocl_fabs(x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); }
@@ -4860,6 +4862,7 @@ DECL_VECTOR_1OP(native_exp10, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_expm1, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_cbrt, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_fabs, float);
+DECL_VECTOR_1OP(__gen_ocl_internal_abs, int);
 DECL_VECTOR_1OP(__gen_ocl_internal_trunc, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_round, float);
 DECL_VECTOR_1OP(__gen_ocl_internal_floor, float);
@@ -4987,6 +4990,7 @@ INLINE_OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(fl
 INLINE_OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}
 
 // XXX workaround ptx profile
+#define abs __gen_ocl_internal_abs
 #define fabs __gen_ocl_internal_fabs
 #define trunc __gen_ocl_internal_trunc
 #define round __gen_ocl_internal_round
diff --git a/kernels/compiler_abs.cl b/kernels/compiler_abs.cl
new file mode 100644
index 0000000..7030a26
--- /dev/null
+++ b/kernels/compiler_abs.cl
@@ -0,0 +1,5 @@
+kernel void compiler_abs(global int *src, global int *dst) {
+  int i = get_global_id(0);
+  dst[i] = abs(src[i]);
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 31152b0..3c8f6ce 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -31,6 +31,7 @@ set (utests_sources
   compiler_double_2.cpp
   compiler_double_3.cpp
   compiler_fabs.cpp
+  compiler_abs.cpp
   compiler_fill_image.cpp
   compiler_fill_image0.cpp
   compiler_fill_image_3d.cpp
diff --git a/utests/compiler_abs.cpp b/utests/compiler_abs.cpp
new file mode 100644
index 0000000..908a32a
--- /dev/null
+++ b/utests/compiler_abs.cpp
@@ -0,0 +1,61 @@
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src, int *dst) {
+    int f = src[global_id];
+    f = f < 0 ? -f : f;
+    dst[global_id] = f;
+}
+
+void compiler_abs(void)
+{
+    const size_t n = 16;
+    int cpu_dst[16], cpu_src[16];
+
+    // Setup kernel and buffers
+    OCL_CREATE_KERNEL("compiler_abs");
+    OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+    OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+    OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+    globals[0] = 16;
+    locals[0] = 16;
+
+    // Run random tests
+    for (uint32_t pass = 0; pass < 8; ++pass) {
+        OCL_MAP_BUFFER(0);
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            cpu_src[i] = ((int*)buf_data[0])[i] = (rand() & 15) - 7;
+
+        // Run the kernel on GPU
+        OCL_NDRANGE(1);
+
+        // Run on CPU
+        for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst);
+
+        // Compare
+        OCL_MAP_BUFFER(1);
+
+#if 0
+        printf("Raw DATA: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", ((int *)buf_data[0])[i]);
+        }
+
+        printf("\nCPU: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", cpu_dst[i]);
+        }
+        printf("\nGPU: \n");
+        for (int32_t i = 0; i < (int32_t) n; ++i) {
+            printf(" %d", ((int *)buf_data[1])[i]);
+        }
+#endif
+
+        for (int32_t i = 0; i < (int32_t) n; ++i)
+            OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]);
+        OCL_UNMAP_BUFFER(1);
+        OCL_UNMAP_BUFFER(0);
+    }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_abs);
-- 
1.7.9.5



More information about the Beignet mailing list