[Beignet] [PATCH v2 2/3] GBE/runtime: fixup broken 1d array image support.
Zhigang Gong
zhigang.gong at linux.intel.com
Thu Jun 19 00:45:40 PDT 2014
forgot to change the gen75 bind image function, please ignore this
version. I already sent the v4 for this patch.
On Thu, Jun 19, 2014 at 03:36:36PM +0800, Zhigang Gong wrote:
> As sample LD message doesn't support array index, we have
> to create a 2D array surface with the same buffer object.
> Thus one 1D array image will have two surfaces binded to it
> one is the index and the second is 128 + index.
>
> And then at kernel side, we will access the corresponding
> 2D array surface when the LD message is required otherwise
> will access the origin 1D array surface.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/backend/gen_insn_selection.cpp | 9 +-
> backend/src/ir/instruction.cpp | 2 +-
> backend/src/ocl_stdlib.tmpl.h | 161 +++++++++++++++++++----------
> src/cl_api.c | 5 +-
> src/cl_command_queue.c | 5 +
> src/cl_device_id.c | 1 +
> src/cl_device_id.h | 1 +
> src/cl_gt_device.h | 1 +
> src/cl_mem.c | 29 +++---
> src/intel/intel_gpgpu.c | 7 +-
> 10 files changed, 149 insertions(+), 72 deletions(-)
>
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index ecb64cd..986aa3e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3606,10 +3606,15 @@ namespace gbe
> msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
> msgLen = srcNum;
> }
> - uint32_t bti = insn.getImageIndex();
> + // We switch to a fixup bti for linear filter on a image1d array sampling.
> + uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
> + if (bti > 253) {
> + std::cerr << "Too large bti " << bti;
> + return false;
> + }
> uint32_t sampler = insn.getSamplerIndex();
>
> - sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset());
> + sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
> return true;
> }
> DECL_CTOR(SampleInstruction, 1, 1);
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index d081235..435869e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -527,7 +527,7 @@ namespace ir {
> uint8_t srcIsFloat:1;
> uint8_t dstIsFloat:1;
> uint8_t samplerIdx:4;
> - uint8_t samplerOffset:1;
> + uint8_t samplerOffset:2;
> uint8_t imageIdx;
> static const uint32_t srcNum = 3;
> static const uint32_t dstNum = 4;
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 605d96d..c43172d 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in
>
> // 2D & 1D Array read
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>
> // 3D & 2D Array read
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
> OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
> OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
> OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>
> // 1D write
> @@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id);
> int __gen_ocl_get_image_channel_data_type(uint surface_id);
> int __gen_ocl_get_image_channel_order(uint surface_id);
> int __gen_ocl_get_image_depth(uint surface_id);
> +/* The printf function. */
> +int __gen_ocl_printf_stub(const char * format, ...);
> +#define printf __gen_ocl_printf_stub
>
> // 2D 3D Image Common Macro
> #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> @@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id);
>
> #define GET_IMAGE(cl_image, surface_id) \
> uint surface_id = (uint)cl_image
> +INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + float array_size = __gen_ocl_get_image_depth(surface_id);
> + return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + float array_size = __gen_ocl_get_image_depth(surface_id);
> + return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + int array_size = __gen_ocl_get_image_depth(surface_id);
> + return clamp(index, 0, array_size - 1);
> +}
>
> -#define DECL_READ_IMAGE0(int_clamping_fix, \
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + int array_size = __gen_ocl_get_image_depth(surface_id);
> + return clamp(index, 0, array_size - 1);
> +}
> +
> +#define DECL_READ_IMAGE0(int_clamping_fix, \
> image_type, type, suffix, coord_type, n) \
> INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \
> const sampler_t sampler, \
> coord_type coord) \
> { \
> GET_IMAGE(cl_image, surface_id); \
> + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \
> if (int_clamping_fix && \
> ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \
> ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \
> return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD(surface_id, sampler, coord), 1); \
> + EXPEND_READ_COORD(surface_id, sampler, coord)); \
> return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\
> + EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \
> }
>
> #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \
> @@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id);
> coord_type coord) \
> { \
> GET_IMAGE(cl_image, surface_id); \
> + GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \
> coord_type tmpCoord = coord; \
> if (float_coord_rounding_fix | int_clamping_fix) { \
> if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
> @@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
> } else \
> intCoord = tmpCoord; \
> return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\
> + EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
> } \
> } \
> } \
> return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
> + EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
> }
>
> #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \
> @@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
> coord_type coord) \
> { \
> GET_IMAGE(cl_image, surface_id); \
> + GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \
> return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD(surface_id, \
> + EXPEND_READ_COORDF(surface_id, \
> CLK_NORMALIZED_COORDS_FALSE \
> | CLK_ADDRESS_NONE \
> - | CLK_FILTER_NEAREST, (float)coord), 0); \
> + | CLK_FILTER_NEAREST, (float)coord), 0); \
> }
>
> #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
> @@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
> DECL_WRITE_IMAGE(image_type, type, suffix, int) \
> DECL_WRITE_IMAGE(image_type, type, suffix, float)
>
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
> #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
> #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
> -
> -#define OUT_OF_BOX(coord, surface, normalized) \
> - (coord < 0 || \
> - ((normalized == 0) \
> - && (coord >= __gen_ocl_get_image_width(surface))) \
> - || ((normalized != 0) && (coord > 0x1p0)))
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> @@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f)
> DECL_IMAGE_INFO_COMMON(image1d_t)
>
> #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> #undef DENORMALIZE_COORD
> #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
> #undef FIXUP_FLOAT_COORD
> #undef DECL_IMAGE
> // End of 1D
> @@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t)
> DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
> DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
> // 2D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> - (int)(coord.s1 < 0 ? -1 : coord.s1)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> + (int)(coord.s1 < 0 ? -1 : coord.s1), 1
> #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
> dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
> #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
>
> -#define OUT_OF_BOX(coord, surface, normalized) \
> - (coord.s0 < 0 || coord.s1 < 0 || \
> - ((normalized == 0) \
> - && (coord.s0 >= __gen_ocl_get_image_width(surface) \
> - || coord.s1 >= __gen_ocl_get_image_height(surface))) \
> - || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0)))
> -
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> @@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
> DECL_IMAGE(0, image2d_t, float4, f, 2)
>
> // 1D Array
> +#undef GET_IMAGE_ARRAY_SIZE
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> + coord_type ai = __gen_compute_array_index(coord.s1, image);
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord) \
> + { \
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> + tmpCoord.s0 += -0x1p-9; \
> + }
> +
> DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
> DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
> DECL_IMAGE(0, image1d_array_t, float4, f, 2)
> @@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> }
>
> #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDI
> +#undef EXPEND_READ_COORDF
> #undef DENORMALIZE_COORD
> #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
> #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
> // End of 2D and 1D Array
>
> // 3D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> + (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
> #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
> dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
> dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
> #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
> -#define OUT_OF_BOX(coord, surface, normalized) \
> - (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 || \
> - ((normalized == 0) \
> - && (coord.s0 >= __gen_ocl_get_image_width(surface) \
> - || coord.s1 >= __gen_ocl_get_image_height(surface) \
> - || coord.s2 >= __gen_ocl_get_image_depth(surface))) \
> - || ((normalized != 0) \
> - &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1)))
>
> #define FIXUP_FLOAT_COORD(tmpCoord) \
> { \
> @@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20) \
> tmpCoord.s2 += -0x1p-9; \
> }
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>
> DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
> DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
> @@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
> DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
> DECL_IMAGE(0, image3d_t, float4, f, 3)
>
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> + (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
> + dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord) \
> + { \
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20) \
> + tmpCoord.s0 += -0x1p-9; \
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20) \
> + tmpCoord.s1 += -0x1p-9; \
> + }
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> + coord_type ai = __gen_compute_array_index(coord.s2, image);
> +
> // 2D Array
> DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
> DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
> @@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
> }
>
> #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> #undef DENORMALIZE_COORD
> #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
> #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
> // End of 3D and 2D Array
>
> #undef DECL_IMAGE
> @@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
> #undef OVERLOADABLE
> #undef INLINE
>
> -/* The printf function. */
> -int __gen_ocl_printf_stub(const char * format, ...);
> -#define printf __gen_ocl_printf_stub
> -
> #endif /* __GEN_OCL_STDLIB_H__ */
> diff --git a/src/cl_api.c b/src/cl_api.c
> index b17cc52..9e412f6 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context ctx,
> err = CL_INVALID_VALUE;
> goto error;
> }
> - if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
> + if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
> + image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
> + image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
> + image_type != CL_MEM_OBJECT_IMAGE2D &&
> image_type != CL_MEM_OBJECT_IMAGE3D)) {
> err = CL_INVALID_VALUE;
> goto error;
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 1bc97ac..41281f2 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
> image->intel_fmt, image->image_type,
> image->w, image->h, image->depth,
> image->row_pitch, image->tiling);
> + if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> + cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
> + image->intel_fmt, image->image_type,
> + image->w, image->h, image->depth,
> + image->row_pitch, image->tiling);
> }
> return CL_SUCCESS;
> }
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index af8e90c..578b548 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id device,
> DECL_FIELD(IMAGE_SUPPORT, image_support)
> DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
> DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
> + DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
> DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
> DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
> DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index a5449a7..769bfd2 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -51,6 +51,7 @@ struct _cl_device_id {
> cl_uint max_read_image_args;
> cl_uint max_write_image_args;
> size_t image2d_max_width;
> + size_t image_max_array_size;
> size_t image2d_max_height;
> size_t image3d_max_width;
> size_t image3d_max_height;
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index b8bda5e..6d03123 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -41,6 +41,7 @@
> .image_support = CL_TRUE,
> .max_read_image_args = 128,
> .max_write_image_args = 8,
> +.image_max_array_size = 2048,
> .image2d_max_width = 8192,
> .image2d_max_height = 8192,
> .image3d_max_width = 8192,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 491993e..a7a0f59 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -540,7 +540,7 @@ static cl_mem
> _cl_mem_new_image(cl_context ctx,
> cl_mem_flags flags,
> const cl_image_format *fmt,
> - const cl_mem_object_type image_type,
> + const cl_mem_object_type orig_image_type,
> size_t w,
> size_t h,
> size_t depth,
> @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
> {
> cl_int err = CL_SUCCESS;
> cl_mem mem = NULL;
> + cl_mem_object_type image_type = orig_image_type;
> uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
> size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
> cl_image_tiling_t tiling = CL_NO_TILE;
> @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
> image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
> DO_IMAGE_ERROR;
>
> - if (image_type == CL_MEM_OBJECT_IMAGE1D ||
> - image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> + if (image_type == CL_MEM_OBJECT_IMAGE1D) {
> size_t min_pitch = bpp * w;
> if (data && pitch == 0)
> pitch = min_pitch;
> @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
> else if (data && slice_pitch == 0)
> slice_pitch = pitch;
> if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
> - if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
> + if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
> if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
> if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
> if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
> @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
>
> depth = 1;
> } else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
> + image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
> image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> + if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> + h = 1;
> + tiling = CL_NO_TILE;
> + } else if (cl_driver_get_ver(ctx->drv) != 6)
> + tiling = cl_get_default_tiling();
> +
> size_t min_pitch = bpp * w;
> if (data && pitch == 0)
> pitch = min_pitch;
> @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
> slice_pitch = min_slice_pitch;
> if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
> if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
> - if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
> + if (image_type == CL_MEM_OBJECT_IMAGE3D &&
> + (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
> + else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
> if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
> if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
> if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
> if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
>
> - /* Pick up tiling mode (we do only linear on SNB) */
> - if (cl_driver_get_ver(ctx->drv) != 6)
> - tiling = cl_get_default_tiling();
> } else
> assert(0);
>
> @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
> /* Tiling requires to align both pitch and height */
> if (tiling == CL_NO_TILE) {
> aligned_pitch = w * bpp;
> - if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
> - image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
> - image_type == CL_MEM_OBJECT_IMAGE3D)
> - aligned_h = ALIGN(h, valign);
> - else
> - aligned_h = h;
> + aligned_h = ALIGN(h, valign);
> } else if (tiling == CL_TILE_X) {
> aligned_pitch = ALIGN(w * bpp, tilex_w);
> aligned_h = ALIGN(h, tilex_h);
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 197d388..ab4cb0d 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -91,7 +91,7 @@ struct intel_gpgpu
>
> unsigned long img_bitmap; /* image usage bitmap. */
> unsigned int img_index_base; /* base index for image surface.*/
> - drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */
> + drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */
>
> unsigned long sampler_bitmap; /* sampler usage bitmap. */
>
> @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
> memset(ss, 0, sizeof(*ss));
>
> ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
> - ss->ss0.surface_type = intel_get_surface_type(type);
> + if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> + ss->ss0.surface_type = I965_SURFACE_2D;
> + else
> + ss->ss0.surface_type = intel_get_surface_type(type);
> if (intel_is_surface_array(type)) {
> ss->ss0.surface_array = 1;
> ss->ss0.surface_array_spacing = 1;
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list