[Beignet] [PATCH 4/4] CL/Runtime: workaround the unused sampler_t kernel argument.
Zhigang Gong
zhigang.gong at intel.com
Sun Nov 24 21:11:09 PST 2013
Current implementation is to use a normal integer to represent
a sampler_t, then later when the sampler is used in read_image
or get_sampler_info, the backend will fixup its type to SAMPLER.
But some test case in piglit will define a sampler_t kernel argument
with an empty kernel budy. Then we will not have a chance to fixup
the kernel argument type to sampler, then we will fail at runtime side.
To workaround this issue, we change the sampler_t to short type.
Then when the user call clSetKernelArg to set a sampler, it will pass
in a pointer size with a short value argument type. It will fail
the size checking logic, then we fixup its type to sampler there.
As this workaround will only take effect when error occur, it will
not bring too much side effect to the normal cases. And it can
pass the existing test cases.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/llvm/llvm_gen_backend.cpp | 2 +-
backend/src/llvm/llvm_gen_ocl_function.hxx | 26 +++++++++++++-------------
backend/src/ocl_stdlib.tmpl.h | 28 ++++++++++++++--------------
src/cl_kernel.c | 10 ++++++++--
src/cl_sampler.c | 2 +-
5 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 9c85c1a..ac49598 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2183,7 +2183,7 @@ namespace gbe
// This is not a kernel argument sampler, we need to append it to sampler set,
// and allocate a sampler slot for it.
auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
- GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
+ GBE_ASSERTM(x.type == ir::TYPE_U16 || x.type == ir::TYPE_S16, "Invalid sampler type");
sampler = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
} else {
sampler = this->getRegister(*AI);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 71034ab..5fe275a 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -43,19 +43,19 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8)
DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
// To read_image functions.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE0, _Z21__gen_ocl_read_imageijjiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE1, _Z21__gen_ocl_read_imageijjffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE2, _Z22__gen_ocl_read_imageuijjiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijjffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjjiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjjffj)
-
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijjiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijjfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijjiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijjfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjjiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjjfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE0, _Z21__gen_ocl_read_imageijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE1, _Z21__gen_ocl_read_imageijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE2, _Z22__gen_ocl_read_imageuijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjtffj)
+
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjtfffj)
// To write_image functions.
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 62f5f78..0ee5192 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -85,7 +85,7 @@ struct _image2d_t;
typedef __texture struct _image2d_t* __image2d_t;
struct _image3d_t;
typedef __texture struct _image3d_t* __image3d_t;
-typedef const uint __sampler_t;
+typedef const ushort __sampler_t;
typedef size_t __event_t;
#define image2d_t __image2d_t
#define image3d_t __image3d_t
@@ -2468,19 +2468,19 @@ int __gen_ocl_force_simd16(void);
// Image access functions
/////////////////////////////////////////////////////////////////////////////
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, uint sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, uint sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, uint sampler, float u, float v, float w, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, int4 color);
@@ -2500,7 +2500,7 @@ int __gen_ocl_get_image_height(uint surface_id);
int __gen_ocl_get_image_channel_data_type(uint surface_id);
int __gen_ocl_get_image_channel_order(uint surface_id);
int __gen_ocl_get_image_depth(uint surface_id);
-ushort __gen_ocl_get_sampler_info(uint sampler_id);
+ushort __gen_ocl_get_sampler_info(sampler_t sampler);
#define GET_IMAGE(cl_image, surface_id) \
uint surface_id = (uint)cl_image
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 803c9e5..6a0c8e6 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -105,8 +105,14 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
arg_type = gbe_kernel_get_arg_type(k->opaque, index);
arg_sz = gbe_kernel_get_arg_size(k->opaque, index);
- if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz))
- return CL_INVALID_ARG_SIZE;
+ if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
+ if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz == sizeof(cl_sampler)) {
+ /* FIXME, this is a workaround for the case when a kernel arg
+ defined a sampler_t but doesn't use it.*/
+ arg_type = GBE_ARG_SAMPLER;
+ } else
+ return CL_INVALID_ARG_SIZE;
+ }
if(UNLIKELY(arg_type == GBE_ARG_LOCAL_PTR && sz == 0))
return CL_INVALID_ARG_SIZE;
diff --git a/src/cl_sampler.c b/src/cl_sampler.c
index 3e7961f..3e88128 100644
--- a/src/cl_sampler.c
+++ b/src/cl_sampler.c
@@ -67,7 +67,7 @@ int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler)
}
}
}
- assert(0);
+ return -1;
}
LOCAL cl_sampler
--
1.7.9.5
More information about the Beignet
mailing list