[Beignet] [PATCH] GBE: Fixed a 3 elements vector load/store bug.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu May 30 19:09:56 PDT 2013
Per OpenCL spec, for 3-component vector data types,the
size of the data type is 4 * sizeof(component). And llvm
FE really cast a type3 data to type4 data for load/store
instruction, thus break our implementation. We need to
fixup it to the actual element size.
Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
backend/src/llvm/llvm_gen_backend.cpp | 17 +++++++++++++++--
kernels/compiler_vector_load_store.cl | 10 +++++-----
utests/compiler_vector_load_store.cpp | 2 +-
3 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index db7d714..2688369 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -368,6 +368,13 @@ namespace gbe
const auto key = std::make_pair(value, index);
return scalarMap.find(key) != scalarMap.end();
}
+ /*! if it's a undef const value, return true. Otherwise, return false. */
+ bool isUndefConst(Value *value, uint32_t index) {
+ getRealValue(value, index);
+
+ Constant *CPV = dyn_cast<Constant>(value);
+ return (CPV && (isa<UndefValue>(CPV)));
+ }
private:
/*! This creates a scalar register for a Value (index is the vector index when
* the value is a vector of scalars)
@@ -2157,10 +2164,16 @@ namespace gbe
Type *elemType = vectorType->getElementType();
// We follow OCL spec and support 2,3,4,8,16 elements only
- const uint32_t elemNum = vectorType->getNumElements();
+ uint32_t elemNum = vectorType->getNumElements();
GBE_ASSERTM(elemNum == 2 || elemNum == 3 || elemNum == 4 || elemNum == 8 || elemNum == 16,
"Only vectors of 2,3,4,8 or 16 elements are supported");
-
+ // Per OPenCL 1.2 spec 6.1.5:
+ // For 3-component vector data types, the size of the data type is 4 * sizeof(component).
+ // And the llvm does cast a type3 data to type4 for load/store instruction,
+ // so a 4 elements vector may only have 3 valid elements. We need to fix it to correct element
+ // count here.
+ if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3))
+ elemNum = 3;
// The code is going to be fairly different from types to types (based on
// size of each vector element)
const ir::Type type = getType(ctx, elemType);
diff --git a/kernels/compiler_vector_load_store.cl b/kernels/compiler_vector_load_store.cl
index b362412..30f0e1e 100644
--- a/kernels/compiler_vector_load_store.cl
+++ b/kernels/compiler_vector_load_store.cl
@@ -18,12 +18,12 @@ __kernel void test_##type ##n(__global type *pin, \
}
#define TEST_ALL_TYPE(n) \
- TEST_TYPE(char,n) \
+ TEST_TYPE(char,n) \
TEST_TYPE(uchar,n) \
TEST_TYPE(short,n) \
- TEST_TYPE(ushort,n) \
- TEST_TYPE(int,n) \
- TEST_TYPE(uint,n) \
+ TEST_TYPE(ushort,n)\
+ TEST_TYPE(int,n) \
+ TEST_TYPE(uint,n) \
TEST_TYPE(float,n)
#if 0
@@ -34,7 +34,7 @@ __kernel void test_##type ##n(__global type *pin, \
#endif
TEST_ALL_TYPE(2)
-//TEST_ALL_TYPE(3)
+TEST_ALL_TYPE(3)
TEST_ALL_TYPE(4)
TEST_ALL_TYPE(8)
TEST_ALL_TYPE(16)
diff --git a/utests/compiler_vector_load_store.cpp b/utests/compiler_vector_load_store.cpp
index 76c12a1..79f284f 100644
--- a/utests/compiler_vector_load_store.cpp
+++ b/utests/compiler_vector_load_store.cpp
@@ -42,7 +42,7 @@ MAKE_UTEST_FROM_FUNCTION(compiler_vector_ ## kernel_type ##n ##_load_store);
#define test_all_vector(type, kernel_type) \
compiler_vector_load_store(type, 2, kernel_type) \
- /*compiler_vector_load_store(type, 3, kernel_type)*/ \
+ compiler_vector_load_store(type, 3, kernel_type) \
compiler_vector_load_store(type, 4, kernel_type) \
compiler_vector_load_store(type, 8, kernel_type) \
compiler_vector_load_store(type, 16, kernel_type)
--
1.7.11.7
More information about the Beignet
mailing list