[Beignet] [PATCH v2 2/3] GBE/runtime: fixup broken 1d array image support.
Zhigang Gong
zhigang.gong at intel.com
Thu Jun 19 00:36:36 PDT 2014
As sample LD message doesn't support array index, we have
to create a 2D array surface with the same buffer object.
Thus one 1D array image will have two surfaces binded to it
one is the index and the second is 128 + index.
And then at kernel side, we will access the corresponding
2D array surface when the LD message is required otherwise
will access the origin 1D array surface.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 9 +-
backend/src/ir/instruction.cpp | 2 +-
backend/src/ocl_stdlib.tmpl.h | 161 +++++++++++++++++++----------
src/cl_api.c | 5 +-
src/cl_command_queue.c | 5 +
src/cl_device_id.c | 1 +
src/cl_device_id.h | 1 +
src/cl_gt_device.h | 1 +
src/cl_mem.c | 29 +++---
src/intel/intel_gpgpu.c | 7 +-
10 files changed, 149 insertions(+), 72 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ecb64cd..986aa3e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3606,10 +3606,15 @@ namespace gbe
msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
msgLen = srcNum;
}
- uint32_t bti = insn.getImageIndex();
+ // We switch to a fixup bti for linear filter on a image1d array sampling.
+ uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
+ if (bti > 253) {
+ std::cerr << "Too large bti " << bti;
+ return false;
+ }
uint32_t sampler = insn.getSamplerIndex();
- sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset());
+ sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
return true;
}
DECL_CTOR(SampleInstruction, 1, 1);
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index d081235..435869e 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -527,7 +527,7 @@ namespace ir {
uint8_t srcIsFloat:1;
uint8_t dstIsFloat:1;
uint8_t samplerIdx:4;
- uint8_t samplerOffset:1;
+ uint8_t samplerOffset:2;
uint8_t imageIdx;
static const uint32_t srcNum = 3;
static const uint32_t dstNum = 4;
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 605d96d..c43172d 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in
// 2D & 1D Array read
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
// 3D & 2D Array read
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
// 1D write
@@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id);
int __gen_ocl_get_image_channel_data_type(uint surface_id);
int __gen_ocl_get_image_channel_order(uint surface_id);
int __gen_ocl_get_image_depth(uint surface_id);
+/* The printf function. */
+int __gen_ocl_printf_stub(const char * format, ...);
+#define printf __gen_ocl_printf_stub
// 2D 3D Image Common Macro
#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
@@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id);
#define GET_IMAGE(cl_image, surface_id) \
uint surface_id = (uint)cl_image
+INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ float array_size = __gen_ocl_get_image_depth(surface_id);
+ return clamp(rint(index), 0.f, array_size - 1.f);
+}
+
+INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ float array_size = __gen_ocl_get_image_depth(surface_id);
+ return clamp(rint(index), 0.f, array_size - 1.f);
+}
+
+INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ int array_size = __gen_ocl_get_image_depth(surface_id);
+ return clamp(index, 0, array_size - 1);
+}
-#define DECL_READ_IMAGE0(int_clamping_fix, \
+INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ int array_size = __gen_ocl_get_image_depth(surface_id);
+ return clamp(index, 0, array_size - 1);
+}
+
+#define DECL_READ_IMAGE0(int_clamping_fix, \
image_type, type, suffix, coord_type, n) \
INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \
const sampler_t sampler, \
coord_type coord) \
{ \
GET_IMAGE(cl_image, surface_id); \
+ GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \
if (int_clamping_fix && \
((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \
((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \
return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, sampler, coord), 1); \
+ EXPEND_READ_COORD(surface_id, sampler, coord)); \
return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\
+ EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \
}
#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \
@@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id);
coord_type coord) \
{ \
GET_IMAGE(cl_image, surface_id); \
+ GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \
coord_type tmpCoord = coord; \
if (float_coord_rounding_fix | int_clamping_fix) { \
if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
@@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
} else \
intCoord = tmpCoord; \
return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\
+ EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
} \
} \
} \
return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
+ EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
}
#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \
@@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
coord_type coord) \
{ \
GET_IMAGE(cl_image, surface_id); \
+ GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \
return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, \
+ EXPEND_READ_COORDF(surface_id, \
CLK_NORMALIZED_COORDS_FALSE \
| CLK_ADDRESS_NONE \
- | CLK_FILTER_NEAREST, (float)coord), 0); \
+ | CLK_FILTER_NEAREST, (float)coord), 0); \
}
#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
@@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
DECL_WRITE_IMAGE(image_type, type, suffix, int) \
DECL_WRITE_IMAGE(image_type, type, suffix, float)
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
-
-#define OUT_OF_BOX(coord, surface, normalized) \
- (coord < 0 || \
- ((normalized == 0) \
- && (coord >= __gen_ocl_get_image_width(surface))) \
- || ((normalized != 0) && (coord > 0x1p0)))
+#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
@@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f)
DECL_IMAGE_INFO_COMMON(image1d_t)
#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
#undef DENORMALIZE_COORD
#undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
#undef FIXUP_FLOAT_COORD
#undef DECL_IMAGE
// End of 1D
@@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t)
DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
// 2D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
+ (int)(coord.s1 < 0 ? -1 : coord.s1), 1
#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
-#define OUT_OF_BOX(coord, surface, normalized) \
- (coord.s0 < 0 || coord.s1 < 0 || \
- ((normalized == 0) \
- && (coord.s0 >= __gen_ocl_get_image_width(surface) \
- || coord.s1 >= __gen_ocl_get_image_height(surface))) \
- || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0)))
-
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
@@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
DECL_IMAGE(0, image2d_t, float4, f, 2)
// 1D Array
+#undef GET_IMAGE_ARRAY_SIZE
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef FIXUP_FLOAT_COORD
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
+#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
+ coord_type ai = __gen_compute_array_index(coord.s1, image);
+
+#define FIXUP_FLOAT_COORD(tmpCoord) \
+ { \
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
+ tmpCoord.s0 += -0x1p-9; \
+ }
+
DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
DECL_IMAGE(0, image1d_array_t, float4, f, 2)
@@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
}
#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDI
+#undef EXPEND_READ_COORDF
#undef DENORMALIZE_COORD
#undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
#undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
// End of 2D and 1D Array
// 3D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
+ (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
-#define OUT_OF_BOX(coord, surface, normalized) \
- (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 || \
- ((normalized == 0) \
- && (coord.s0 >= __gen_ocl_get_image_width(surface) \
- || coord.s1 >= __gen_ocl_get_image_height(surface) \
- || coord.s2 >= __gen_ocl_get_image_depth(surface))) \
- || ((normalized != 0) \
- &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1)))
#define FIXUP_FLOAT_COORD(tmpCoord) \
{ \
@@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \
tmpCoord.s2 += -0x1p-9; \
}
+#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
@@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
DECL_IMAGE(0, image3d_t, float4, f, 3)
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
+ (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
+ dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
+
+#define FIXUP_FLOAT_COORD(tmpCoord) \
+ { \
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
+ tmpCoord.s0 += -0x1p-9; \
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
+ tmpCoord.s1 += -0x1p-9; \
+ }
+#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
+ coord_type ai = __gen_compute_array_index(coord.s2, image);
+
// 2D Array
DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
@@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
}
#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
#undef DENORMALIZE_COORD
#undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
#undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
// End of 3D and 2D Array
#undef DECL_IMAGE
@@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
#undef OVERLOADABLE
#undef INLINE
-/* The printf function. */
-int __gen_ocl_printf_stub(const char * format, ...);
-#define printf __gen_ocl_printf_stub
-
#endif /* __GEN_OCL_STDLIB_H__ */
diff --git a/src/cl_api.c b/src/cl_api.c
index b17cc52..9e412f6 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx,
err = CL_INVALID_VALUE;
goto error;
}
- if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
+ if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
+ image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
+ image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
+ image_type != CL_MEM_OBJECT_IMAGE2D &&
image_type != CL_MEM_OBJECT_IMAGE3D)) {
err = CL_INVALID_VALUE;
goto error;
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 1bc97ac..41281f2 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
image->intel_fmt, image->image_type,
image->w, image->h, image->depth,
image->row_pitch, image->tiling);
+ if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
+ image->intel_fmt, image->image_type,
+ image->w, image->h, image->depth,
+ image->row_pitch, image->tiling);
}
return CL_SUCCESS;
}
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index af8e90c..578b548 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(IMAGE_SUPPORT, image_support)
DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
+ DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index a5449a7..769bfd2 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -51,6 +51,7 @@ struct _cl_device_id {
cl_uint max_read_image_args;
cl_uint max_write_image_args;
size_t image2d_max_width;
+ size_t image_max_array_size;
size_t image2d_max_height;
size_t image3d_max_width;
size_t image3d_max_height;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index b8bda5e..6d03123 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -41,6 +41,7 @@
.image_support = CL_TRUE,
.max_read_image_args = 128,
.max_write_image_args = 8,
+.image_max_array_size = 2048,
.image2d_max_width = 8192,
.image2d_max_height = 8192,
.image3d_max_width = 8192,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 491993e..a7a0f59 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -540,7 +540,7 @@ static cl_mem
_cl_mem_new_image(cl_context ctx,
cl_mem_flags flags,
const cl_image_format *fmt,
- const cl_mem_object_type image_type,
+ const cl_mem_object_type orig_image_type,
size_t w,
size_t h,
size_t depth,
@@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
{
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
+ cl_mem_object_type image_type = orig_image_type;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
cl_image_tiling_t tiling = CL_NO_TILE;
@@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
DO_IMAGE_ERROR;
- if (image_type == CL_MEM_OBJECT_IMAGE1D ||
- image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
else if (data && slice_pitch == 0)
slice_pitch = pitch;
if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
- if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ h = 1;
+ tiling = CL_NO_TILE;
+ } else if (cl_driver_get_ver(ctx->drv) != 6)
+ tiling = cl_get_default_tiling();
+
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
slice_pitch = min_slice_pitch;
if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
- if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
+ if (image_type == CL_MEM_OBJECT_IMAGE3D &&
+ (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
+ else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
- /* Pick up tiling mode (we do only linear on SNB) */
- if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling();
} else
assert(0);
@@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
/* Tiling requires to align both pitch and height */
if (tiling == CL_NO_TILE) {
aligned_pitch = w * bpp;
- if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
- image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
- image_type == CL_MEM_OBJECT_IMAGE3D)
- aligned_h = ALIGN(h, valign);
- else
- aligned_h = h;
+ aligned_h = ALIGN(h, valign);
} else if (tiling == CL_TILE_X) {
aligned_pitch = ALIGN(w * bpp, tilex_w);
aligned_h = ALIGN(h, tilex_h);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 197d388..ab4cb0d 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -91,7 +91,7 @@ struct intel_gpgpu
unsigned long img_bitmap; /* image usage bitmap. */
unsigned int img_index_base; /* base index for image surface.*/
- drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */
+ drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */
unsigned long sampler_bitmap; /* sampler usage bitmap. */
@@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
memset(ss, 0, sizeof(*ss));
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- ss->ss0.surface_type = intel_get_surface_type(type);
+ if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ else
+ ss->ss0.surface_type = intel_get_surface_type(type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
--
1.8.3.2
More information about the Beignet
mailing list