[Beignet] [PATCH] GBE: Fixed a 3 elements vector load/store bug.

Zhigang Gong zhigang.gong at linux.intel.com
Thu May 30 19:09:56 PDT 2013


Per OpenCL spec, for 3-component vector data types,the
size of the data type is 4 * sizeof(component). And llvm
FE really cast a type3 data to type4 data for load/store
instruction, thus break our implementation. We need to
fixup it to the actual element size.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp | 17 +++++++++++++++--
 kernels/compiler_vector_load_store.cl | 10 +++++-----
 utests/compiler_vector_load_store.cpp |  2 +-
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index db7d714..2688369 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -368,6 +368,13 @@ namespace gbe
       const auto key = std::make_pair(value, index);
       return scalarMap.find(key) != scalarMap.end();
     }
+    /*! if it's a undef const value, return true. Otherwise, return false. */
+    bool isUndefConst(Value *value, uint32_t index) {
+      getRealValue(value, index);
+
+      Constant *CPV = dyn_cast<Constant>(value);
+      return (CPV && (isa<UndefValue>(CPV)));
+    }
   private:
     /*! This creates a scalar register for a Value (index is the vector index when
      *  the value is a vector of scalars)
@@ -2157,10 +2164,16 @@ namespace gbe
       Type *elemType = vectorType->getElementType();
 
       // We follow OCL spec and support 2,3,4,8,16 elements only
-      const uint32_t elemNum = vectorType->getNumElements();
+      uint32_t elemNum = vectorType->getNumElements();
       GBE_ASSERTM(elemNum == 2 || elemNum == 3 || elemNum == 4 || elemNum == 8 || elemNum == 16,
                   "Only vectors of 2,3,4,8 or 16 elements are supported");
-
+      // Per OPenCL 1.2 spec 6.1.5:
+      //   For 3-component vector data types, the size of the data type is 4 * sizeof(component).
+      // And the llvm does cast a type3 data to type4 for load/store instruction,
+      // so a 4 elements vector may only have 3 valid elements. We need to fix it to correct element
+      // count here.
+      if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3))
+          elemNum = 3;
       // The code is going to be fairly different from types to types (based on
       // size of each vector element)
       const ir::Type type = getType(ctx, elemType);
diff --git a/kernels/compiler_vector_load_store.cl b/kernels/compiler_vector_load_store.cl
index b362412..30f0e1e 100644
--- a/kernels/compiler_vector_load_store.cl
+++ b/kernels/compiler_vector_load_store.cl
@@ -18,12 +18,12 @@ __kernel void test_##type ##n(__global type *pin, \
 }
 
 #define TEST_ALL_TYPE(n) \
-  TEST_TYPE(char,n) \
+  TEST_TYPE(char,n)  \
   TEST_TYPE(uchar,n) \
   TEST_TYPE(short,n) \
-  TEST_TYPE(ushort,n) \
-  TEST_TYPE(int,n) \
-  TEST_TYPE(uint,n) \
+  TEST_TYPE(ushort,n)\
+  TEST_TYPE(int,n)   \
+  TEST_TYPE(uint,n)  \
   TEST_TYPE(float,n)
 
 #if 0
@@ -34,7 +34,7 @@ __kernel void test_##type ##n(__global type *pin, \
 #endif
 
 TEST_ALL_TYPE(2)
-//TEST_ALL_TYPE(3)
+TEST_ALL_TYPE(3)
 TEST_ALL_TYPE(4)
 TEST_ALL_TYPE(8)
 TEST_ALL_TYPE(16)
diff --git a/utests/compiler_vector_load_store.cpp b/utests/compiler_vector_load_store.cpp
index 76c12a1..79f284f 100644
--- a/utests/compiler_vector_load_store.cpp
+++ b/utests/compiler_vector_load_store.cpp
@@ -42,7 +42,7 @@ MAKE_UTEST_FROM_FUNCTION(compiler_vector_ ## kernel_type ##n ##_load_store);
 
 #define test_all_vector(type, kernel_type) \
   compiler_vector_load_store(type, 2, kernel_type) \
-  /*compiler_vector_load_store(type, 3, kernel_type)*/ \
+  compiler_vector_load_store(type, 3, kernel_type) \
   compiler_vector_load_store(type, 4, kernel_type) \
   compiler_vector_load_store(type, 8, kernel_type) \
   compiler_vector_load_store(type, 16, kernel_type)
-- 
1.7.11.7



More information about the Beignet mailing list