[Beignet] [PATCH v3 2/3] GBE/runtime: fixup broken 1d array image support.

He Junyan junyan.he at inbox.com
Thu Jun 19 23:46:08 PDT 2014


The hw limitation cause us to add a lot of
tricks into our code. Maybe we need to find
some place to declare details for others
to understand.

The whole patch set is OK for me.

On 四, 2014-06-19 at 15:44 +0800, Zhigang Gong wrote:
> As sample LD message doesn't support array index, we have
> to create a 2D array surface with the same buffer object.
> Thus one 1D array image will have two surfaces binded to it
> one is the index and the second is 128 + index.
> 
> And then at kernel side, we will access the corresponding
> 2D array surface when the LD message is required otherwise
> will access the origin 1D array surface.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   9 +-
>  backend/src/ir/instruction.cpp             |   2 +-
>  backend/src/ocl_stdlib.tmpl.h              | 161 +++++++++++++++++++----------
>  src/cl_api.c                               |   5 +-
>  src/cl_command_queue.c                     |   5 +
>  src/cl_device_id.c                         |   1 +
>  src/cl_device_id.h                         |   1 +
>  src/cl_gt_device.h                         |   1 +
>  src/cl_mem.c                               |  29 +++---
>  src/intel/intel_gpgpu.c                    |  12 ++-
>  10 files changed, 153 insertions(+), 73 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index ecb64cd..986aa3e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3606,10 +3606,15 @@ namespace gbe
>            msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
>          msgLen = srcNum;
>        }
> -      uint32_t bti = insn.getImageIndex();
> +      // We switch to a fixup bti for linear filter on a image1d array sampling.
> +      uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
> +      if (bti > 253) {
> +        std::cerr << "Too large bti " << bti;
> +        return false;
> +      }
>        uint32_t sampler = insn.getSamplerIndex();
>  
> -      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset());
> +      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
>        return true;
>      }
>      DECL_CTOR(SampleInstruction, 1, 1);
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index d081235..435869e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -527,7 +527,7 @@ namespace ir {
>        uint8_t srcIsFloat:1;
>        uint8_t dstIsFloat:1;
>        uint8_t samplerIdx:4;
> -      uint8_t samplerOffset:1;
> +      uint8_t samplerOffset:2;
>        uint8_t imageIdx;
>        static const uint32_t srcNum = 3;
>        static const uint32_t dstNum = 4;
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 605d96d..c43172d 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in
>  
>  // 2D & 1D Array read
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  
>  // 3D & 2D Array read
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  
>  // 1D write
> @@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id);
>  int __gen_ocl_get_image_channel_data_type(uint surface_id);
>  int __gen_ocl_get_image_channel_order(uint surface_id);
>  int __gen_ocl_get_image_depth(uint surface_id);
> +/* The printf function. */
> +int __gen_ocl_printf_stub(const char * format, ...);
> +#define printf __gen_ocl_printf_stub
>  
>  // 2D 3D Image Common Macro
>  #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> @@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id);
>  
>  #define GET_IMAGE(cl_image, surface_id) \
>      uint surface_id = (uint)cl_image
> +INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(index, 0, array_size - 1);
> +}
>  
> -#define DECL_READ_IMAGE0(int_clamping_fix,          \
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(index, 0, array_size - 1);
> +}
> +
> +#define DECL_READ_IMAGE0(int_clamping_fix,                                   \
>                          image_type, type, suffix, coord_type, n)             \
>    INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
>                                                 const sampler_t sampler,      \
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai);                          \
>      if (int_clamping_fix &&                                                  \
>          ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&             \
>          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST))               \
>              return   __gen_ocl_read_image ##suffix(                          \
> -                        EXPEND_READ_COORD(surface_id, sampler, coord), 1);   \
> +                        EXPEND_READ_COORD(surface_id, sampler, coord));      \
>      return  __gen_ocl_read_image ##suffix(                                   \
> -                    EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\
> +                    EXPEND_READ_COORDF(surface_id, sampler, coord), 0);      \
>    }
>  
>  #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,         \
> @@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id);
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai)                         \
>      coord_type tmpCoord = coord;                                             \
>      if (float_coord_rounding_fix | int_clamping_fix) {                       \
>        if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)              \
> @@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>              } else                                                           \
>                intCoord = tmpCoord;                                           \
>              return   __gen_ocl_read_image ##suffix(                          \
> -                       EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\
> +                       EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
>         }                                                                     \
>        }                                                                      \
>      }                                                                        \
>      return  __gen_ocl_read_image ##suffix(                                   \
> -                        EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
> +                        EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
>    }
>  
>  #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n)   \
> @@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai)                           \
>      return __gen_ocl_read_image ##suffix(                                    \
> -           EXPEND_READ_COORD(surface_id,                                     \
> +           EXPEND_READ_COORDF(surface_id,                                    \
>                               CLK_NORMALIZED_COORDS_FALSE                     \
>                               | CLK_ADDRESS_NONE                              \
> -                             | CLK_FILTER_NEAREST, (float)coord), 0);               \
> +                             | CLK_FILTER_NEAREST, (float)coord), 0);        \
>    }
>  
>  #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
> @@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>    DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
>    DECL_WRITE_IMAGE(image_type, type, suffix, float)
>  
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
> -
> -#define OUT_OF_BOX(coord, surface, normalized)                   \
> -  (coord < 0 ||                                                  \
> -   ((normalized == 0)                                            \
> -     && (coord >= __gen_ocl_get_image_width(surface)))           \
> -   || ((normalized != 0) && (coord > 0x1p0)))
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
> @@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f)
>  DECL_IMAGE_INFO_COMMON(image1d_t)
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
>  #undef DECL_IMAGE
>  // End of 1D
> @@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t)
>    DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
>    DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
>  // 2D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
>                                                    dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
>  
> -#define OUT_OF_BOX(coord, surface, normalized)                   \
> -  (coord.s0 < 0 || coord.s1 < 0 ||                               \
> -   ((normalized == 0)                                            \
> -     && (coord.s0 >= __gen_ocl_get_image_width(surface)          \
> -         || coord.s1 >= __gen_ocl_get_image_height(surface)))    \
> -   || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0)))
> -
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
>      if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> @@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
>  DECL_IMAGE(0, image2d_t, float4, f, 2)
>  
>  // 1D Array
> +#undef GET_IMAGE_ARRAY_SIZE
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> +  coord_type ai = __gen_compute_array_index(coord.s1, image);
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord)                            \
> +  {                                                            \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> +      tmpCoord.s0 += -0x1p-9;                                  \
> +  }
> +
>  DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
>  DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
>  DECL_IMAGE(0, image1d_array_t, float4, f, 2)
> @@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>  }
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDI
> +#undef EXPEND_READ_COORDF
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
>  // End of 2D and 1D Array
>  
>  // 3D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
>                                                    dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
>                                                    dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
> -#define OUT_OF_BOX(coord, surface, normalized)                  \
> -  (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 ||              \
> -   ((normalized == 0)                                           \
> -     && (coord.s0 >= __gen_ocl_get_image_width(surface)         \
> -         || coord.s1 >= __gen_ocl_get_image_height(surface)     \
> -         || coord.s2 >= __gen_ocl_get_image_depth(surface)))    \
> -   || ((normalized != 0)                                        \
> -        &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1)))
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                             \
>    {                                                             \
> @@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>      if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20)              \
>        tmpCoord.s2 += -0x1p-9;                                   \
>    }
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>  
>  DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
>  DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
> @@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
>  DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
>  DECL_IMAGE(0, image3d_t, float4, f, 3)
>  
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
> +                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord)                             \
> +  {                                                             \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
> +      tmpCoord.s0 += -0x1p-9;                                   \
> +    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
> +      tmpCoord.s1 += -0x1p-9;                                   \
> +  }
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> +  coord_type ai = __gen_compute_array_index(coord.s2, image);
> +
>  // 2D Array
>  DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
>  DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
> @@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
>  }
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
>  // End of 3D and 2D Array
>  
>  #undef DECL_IMAGE
> @@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
>  #undef OVERLOADABLE
>  #undef INLINE
>  
> -/* The printf function. */
> -int __gen_ocl_printf_stub(const char * format, ...);
> -#define printf __gen_ocl_printf_stub
> -
>  #endif /* __GEN_OCL_STDLIB_H__ */
> diff --git a/src/cl_api.c b/src/cl_api.c
> index b17cc52..9e412f6 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context         ctx,
>      err = CL_INVALID_VALUE;
>      goto error;
>    }
> -  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
> +  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
> +               image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
> +               image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
> +               image_type != CL_MEM_OBJECT_IMAGE2D &&
>                 image_type != CL_MEM_OBJECT_IMAGE3D)) {
>      err = CL_INVALID_VALUE;
>      goto error;
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 1bc97ac..41281f2 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
>                          image->intel_fmt, image->image_type,
>                          image->w, image->h, image->depth,
>                          image->row_pitch, image->tiling);
> +    if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +      cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
> +                          image->intel_fmt, image->image_type,
> +                          image->w, image->h, image->depth,
> +                          image->row_pitch, image->tiling);
>    }
>    return CL_SUCCESS;
>  }
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index af8e90c..578b548 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id     device,
>      DECL_FIELD(IMAGE_SUPPORT, image_support)
>      DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
>      DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
> +    DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
>      DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
>      DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
>      DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index a5449a7..769bfd2 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -51,6 +51,7 @@ struct _cl_device_id {
>    cl_uint  max_read_image_args;
>    cl_uint  max_write_image_args;
>    size_t   image2d_max_width;
> +  size_t   image_max_array_size;
>    size_t   image2d_max_height;
>    size_t   image3d_max_width;
>    size_t   image3d_max_height;
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index b8bda5e..6d03123 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -41,6 +41,7 @@
>  .image_support = CL_TRUE,
>  .max_read_image_args = 128,
>  .max_write_image_args = 8,
> +.image_max_array_size = 2048,
>  .image2d_max_width = 8192,
>  .image2d_max_height = 8192,
>  .image3d_max_width = 8192,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 491993e..a7a0f59 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -540,7 +540,7 @@ static cl_mem
>  _cl_mem_new_image(cl_context ctx,
>                    cl_mem_flags flags,
>                    const cl_image_format *fmt,
> -                  const cl_mem_object_type image_type,
> +                  const cl_mem_object_type orig_image_type,
>                    size_t w,
>                    size_t h,
>                    size_t depth,
> @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
>  {
>    cl_int err = CL_SUCCESS;
>    cl_mem mem = NULL;
> +  cl_mem_object_type image_type = orig_image_type;
>    uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
>    size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
>    cl_image_tiling_t tiling = CL_NO_TILE;
> @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
>        image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
>      DO_IMAGE_ERROR;
>  
> -  if (image_type == CL_MEM_OBJECT_IMAGE1D ||
> -      image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
>      size_t min_pitch = bpp * w;
>      if (data && pitch == 0)
>        pitch = min_pitch;
> @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
>      else if (data && slice_pitch == 0)
>        slice_pitch = pitch;
>      if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
> -    if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
> +    if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
> @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
>  
>      depth = 1;
>    } else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
> +             image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
>               image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> +    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +      h = 1;
> +      tiling = CL_NO_TILE;
> +    } else if (cl_driver_get_ver(ctx->drv) != 6)
> +      tiling = cl_get_default_tiling();
> +
>      size_t min_pitch = bpp * w;
>      if (data && pitch == 0)
>        pitch = min_pitch;
> @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
>        slice_pitch = min_slice_pitch;
>      if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
>      if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
> -    if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
> +    if (image_type == CL_MEM_OBJECT_IMAGE3D &&
> +       (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
> +    else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
>  
> -    /* Pick up tiling mode (we do only linear on SNB) */
> -    if (cl_driver_get_ver(ctx->drv) != 6)
> -      tiling = cl_get_default_tiling();
>    } else
>      assert(0);
>  
> @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
>    /* Tiling requires to align both pitch and height */
>    if (tiling == CL_NO_TILE) {
>      aligned_pitch = w * bpp;
> -    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
> -        image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
> -        image_type == CL_MEM_OBJECT_IMAGE3D)
> -      aligned_h = ALIGN(h, valign);
> -    else
> -      aligned_h     = h;
> +    aligned_h  = ALIGN(h, valign);
>    } else if (tiling == CL_TILE_X) {
>      aligned_pitch = ALIGN(w * bpp, tilex_w);
>      aligned_h     = ALIGN(h, tilex_h);
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 197d388..c98a440 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -91,7 +91,7 @@ struct intel_gpgpu
>  
>    unsigned long img_bitmap;              /* image usage bitmap. */
>    unsigned int img_index_base;          /* base index for image surface.*/
> -  drm_intel_bo *binded_img[max_img_n];  /* all images binded for the call */
> +  drm_intel_bo *binded_img[max_img_n + 128];  /* all images binded for the call */
>  
>    unsigned long sampler_bitmap;          /* sampler usage bitmap. */
>  
> @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
>    memset(ss, 0, sizeof(*ss));
>  
>    ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
> -  ss->ss0.surface_type = intel_get_surface_type(type);
> +  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +    ss->ss0.surface_type = I965_SURFACE_2D;
> +  else
> +    ss->ss0.surface_type = intel_get_surface_type(type);
>    if (intel_is_surface_array(type)) {
>      ss->ss0.surface_array = 1;
>      ss->ss0.surface_array_spacing = 1;
> @@ -811,7 +814,10 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
>    memset(ss, 0, sizeof(*ss));
>  
>    ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
> -  ss->ss0.surface_type = intel_get_surface_type(type);
> +  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +    ss->ss0.surface_type = I965_SURFACE_2D;
> +  else
> +    ss->ss0.surface_type = intel_get_surface_type(type);
>    if (intel_is_surface_array(type)) {
>      ss->ss0.surface_array = 1;
>      ss->ss0.surface_array_spacing = 1;





More information about the Beignet mailing list