[Beignet] [PATCH v2 2/3] GBE/runtime: fixup broken 1d array image support.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Jun 19 00:45:40 PDT 2014


forgot to change the gen75 bind image function, please ignore this
version. I already sent the v4 for this patch.

On Thu, Jun 19, 2014 at 03:36:36PM +0800, Zhigang Gong wrote:
> As sample LD message doesn't support array index, we have
> to create a 2D array surface with the same buffer object.
> Thus one 1D array image will have two surfaces binded to it
> one is the index and the second is 128 + index.
> 
> And then at kernel side, we will access the corresponding
> 2D array surface when the LD message is required otherwise
> will access the origin 1D array surface.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   9 +-
>  backend/src/ir/instruction.cpp             |   2 +-
>  backend/src/ocl_stdlib.tmpl.h              | 161 +++++++++++++++++++----------
>  src/cl_api.c                               |   5 +-
>  src/cl_command_queue.c                     |   5 +
>  src/cl_device_id.c                         |   1 +
>  src/cl_device_id.h                         |   1 +
>  src/cl_gt_device.h                         |   1 +
>  src/cl_mem.c                               |  29 +++---
>  src/intel/intel_gpgpu.c                    |   7 +-
>  10 files changed, 149 insertions(+), 72 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index ecb64cd..986aa3e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3606,10 +3606,15 @@ namespace gbe
>            msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
>          msgLen = srcNum;
>        }
> -      uint32_t bti = insn.getImageIndex();
> +      // We switch to a fixup bti for linear filter on a image1d array sampling.
> +      uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
> +      if (bti > 253) {
> +        std::cerr << "Too large bti " << bti;
> +        return false;
> +      }
>        uint32_t sampler = insn.getSamplerIndex();
>  
> -      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset());
> +      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
>        return true;
>      }
>      DECL_CTOR(SampleInstruction, 1, 1);
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index d081235..435869e 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -527,7 +527,7 @@ namespace ir {
>        uint8_t srcIsFloat:1;
>        uint8_t dstIsFloat:1;
>        uint8_t samplerIdx:4;
> -      uint8_t samplerOffset:1;
> +      uint8_t samplerOffset:2;
>        uint8_t imageIdx;
>        static const uint32_t srcNum = 3;
>        static const uint32_t dstNum = 4;
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 605d96d..c43172d 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in
>  
>  // 2D & 1D Array read
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
>  
>  // 3D & 2D Array read
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
>  OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
>  
>  // 1D write
> @@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id);
>  int __gen_ocl_get_image_channel_data_type(uint surface_id);
>  int __gen_ocl_get_image_channel_order(uint surface_id);
>  int __gen_ocl_get_image_depth(uint surface_id);
> +/* The printf function. */
> +int __gen_ocl_printf_stub(const char * format, ...);
> +#define printf __gen_ocl_printf_stub
>  
>  // 2D 3D Image Common Macro
>  #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> @@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id);
>  
>  #define GET_IMAGE(cl_image, surface_id) \
>      uint surface_id = (uint)cl_image
> +INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(rint(index), 0.f, array_size - 1.f);
> +}
> +
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(index, 0, array_size - 1);
> +}
>  
> -#define DECL_READ_IMAGE0(int_clamping_fix,          \
> +INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  return clamp(index, 0, array_size - 1);
> +}
> +
> +#define DECL_READ_IMAGE0(int_clamping_fix,                                   \
>                          image_type, type, suffix, coord_type, n)             \
>    INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
>                                                 const sampler_t sampler,      \
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai);                          \
>      if (int_clamping_fix &&                                                  \
>          ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&             \
>          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST))               \
>              return   __gen_ocl_read_image ##suffix(                          \
> -                        EXPEND_READ_COORD(surface_id, sampler, coord), 1);   \
> +                        EXPEND_READ_COORD(surface_id, sampler, coord));      \
>      return  __gen_ocl_read_image ##suffix(                                   \
> -                    EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\
> +                    EXPEND_READ_COORDF(surface_id, sampler, coord), 0);      \
>    }
>  
>  #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,         \
> @@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id);
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai)                         \
>      coord_type tmpCoord = coord;                                             \
>      if (float_coord_rounding_fix | int_clamping_fix) {                       \
>        if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)              \
> @@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>              } else                                                           \
>                intCoord = tmpCoord;                                           \
>              return   __gen_ocl_read_image ##suffix(                          \
> -                       EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\
> +                       EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
>         }                                                                     \
>        }                                                                      \
>      }                                                                        \
>      return  __gen_ocl_read_image ##suffix(                                   \
> -                        EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
> +                        EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
>    }
>  
>  #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n)   \
> @@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>                                                 coord_type coord)             \
>    {                                                                          \
>      GET_IMAGE(cl_image, surface_id);                                         \
> +    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai)                           \
>      return __gen_ocl_read_image ##suffix(                                    \
> -           EXPEND_READ_COORD(surface_id,                                     \
> +           EXPEND_READ_COORDF(surface_id,                                    \
>                               CLK_NORMALIZED_COORDS_FALSE                     \
>                               | CLK_ADDRESS_NONE                              \
> -                             | CLK_FILTER_NEAREST, (float)coord), 0);               \
> +                             | CLK_FILTER_NEAREST, (float)coord), 0);        \
>    }
>  
>  #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
> @@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
>    DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
>    DECL_WRITE_IMAGE(image_type, type, suffix, float)
>  
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
> -
> -#define OUT_OF_BOX(coord, surface, normalized)                   \
> -  (coord < 0 ||                                                  \
> -   ((normalized == 0)                                            \
> -     && (coord >= __gen_ocl_get_image_width(surface)))           \
> -   || ((normalized != 0) && (coord > 0x1p0)))
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
> @@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f)
>  DECL_IMAGE_INFO_COMMON(image1d_t)
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
>  #undef DECL_IMAGE
>  // End of 1D
> @@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t)
>    DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
>    DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
>  // 2D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
>                                                    dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
>  
> -#define OUT_OF_BOX(coord, surface, normalized)                   \
> -  (coord.s0 < 0 || coord.s1 < 0 ||                               \
> -   ((normalized == 0)                                            \
> -     && (coord.s0 >= __gen_ocl_get_image_width(surface)          \
> -         || coord.s1 >= __gen_ocl_get_image_height(surface)))    \
> -   || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0)))
> -
>  #define FIXUP_FLOAT_COORD(tmpCoord)                            \
>    {                                                            \
>      if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> @@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
>  DECL_IMAGE(0, image2d_t, float4, f, 2)
>  
>  // 1D Array
> +#undef GET_IMAGE_ARRAY_SIZE
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> +  coord_type ai = __gen_compute_array_index(coord.s1, image);
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord)                            \
> +  {                                                            \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> +      tmpCoord.s0 += -0x1p-9;                                  \
> +  }
> +
>  DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
>  DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
>  DECL_IMAGE(0, image1d_array_t, float4, f, 2)
> @@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>  }
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDI
> +#undef EXPEND_READ_COORDF
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
>  // End of 2D and 1D Array
>  
>  // 3D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
> -#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
>  #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
>                                                    dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
>                                                    dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
>  #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
> -#define OUT_OF_BOX(coord, surface, normalized)                  \
> -  (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 ||              \
> -   ((normalized == 0)                                           \
> -     && (coord.s0 >= __gen_ocl_get_image_width(surface)         \
> -         || coord.s1 >= __gen_ocl_get_image_height(surface)     \
> -         || coord.s2 >= __gen_ocl_get_image_depth(surface)))    \
> -   || ((normalized != 0)                                        \
> -        &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1)))
>  
>  #define FIXUP_FLOAT_COORD(tmpCoord)                             \
>    {                                                             \
> @@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>      if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20)              \
>        tmpCoord.s2 += -0x1p-9;                                   \
>    }
> +#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
>  
>  DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
>  DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
> @@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
>  DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
>  DECL_IMAGE(0, image3d_t, float4, f, 3)
>  
> +#undef EXPEND_READ_COORD
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
> +#undef DENORMALIZE_COORD
> +#undef EXPEND_WRITE_COORD
> +#undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
> +
> +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
> +#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
> +#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
> +                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
> +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
> +                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
> +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
> +
> +#define FIXUP_FLOAT_COORD(tmpCoord)                             \
> +  {                                                             \
> +    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
> +      tmpCoord.s0 += -0x1p-9;                                   \
> +    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
> +      tmpCoord.s1 += -0x1p-9;                                   \
> +  }
> +#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> +  coord_type ai = __gen_compute_array_index(coord.s2, image);
> +
>  // 2D Array
>  DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
>  DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
> @@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
>  }
>  
>  #undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORD1
> +#undef EXPEND_READ_COORDF
> +#undef EXPEND_READ_COORDI
>  #undef DENORMALIZE_COORD
>  #undef EXPEND_WRITE_COORD
> -#undef OUT_OF_BOX
>  #undef FIXUP_FLOAT_COORD
> +#undef GET_IMAGE_ARRAY_SIZE
>  // End of 3D and 2D Array
>  
>  #undef DECL_IMAGE
> @@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
>  #undef OVERLOADABLE
>  #undef INLINE
>  
> -/* The printf function. */
> -int __gen_ocl_printf_stub(const char * format, ...);
> -#define printf __gen_ocl_printf_stub
> -
>  #endif /* __GEN_OCL_STDLIB_H__ */
> diff --git a/src/cl_api.c b/src/cl_api.c
> index b17cc52..9e412f6 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context         ctx,
>      err = CL_INVALID_VALUE;
>      goto error;
>    }
> -  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
> +  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
> +               image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
> +               image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
> +               image_type != CL_MEM_OBJECT_IMAGE2D &&
>                 image_type != CL_MEM_OBJECT_IMAGE3D)) {
>      err = CL_INVALID_VALUE;
>      goto error;
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 1bc97ac..41281f2 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
>                          image->intel_fmt, image->image_type,
>                          image->w, image->h, image->depth,
>                          image->row_pitch, image->tiling);
> +    if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +      cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
> +                          image->intel_fmt, image->image_type,
> +                          image->w, image->h, image->depth,
> +                          image->row_pitch, image->tiling);
>    }
>    return CL_SUCCESS;
>  }
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index af8e90c..578b548 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id     device,
>      DECL_FIELD(IMAGE_SUPPORT, image_support)
>      DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
>      DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
> +    DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
>      DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
>      DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
>      DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index a5449a7..769bfd2 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -51,6 +51,7 @@ struct _cl_device_id {
>    cl_uint  max_read_image_args;
>    cl_uint  max_write_image_args;
>    size_t   image2d_max_width;
> +  size_t   image_max_array_size;
>    size_t   image2d_max_height;
>    size_t   image3d_max_width;
>    size_t   image3d_max_height;
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index b8bda5e..6d03123 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -41,6 +41,7 @@
>  .image_support = CL_TRUE,
>  .max_read_image_args = 128,
>  .max_write_image_args = 8,
> +.image_max_array_size = 2048,
>  .image2d_max_width = 8192,
>  .image2d_max_height = 8192,
>  .image3d_max_width = 8192,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 491993e..a7a0f59 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -540,7 +540,7 @@ static cl_mem
>  _cl_mem_new_image(cl_context ctx,
>                    cl_mem_flags flags,
>                    const cl_image_format *fmt,
> -                  const cl_mem_object_type image_type,
> +                  const cl_mem_object_type orig_image_type,
>                    size_t w,
>                    size_t h,
>                    size_t depth,
> @@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
>  {
>    cl_int err = CL_SUCCESS;
>    cl_mem mem = NULL;
> +  cl_mem_object_type image_type = orig_image_type;
>    uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
>    size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
>    cl_image_tiling_t tiling = CL_NO_TILE;
> @@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
>        image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
>      DO_IMAGE_ERROR;
>  
> -  if (image_type == CL_MEM_OBJECT_IMAGE1D ||
> -      image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
>      size_t min_pitch = bpp * w;
>      if (data && pitch == 0)
>        pitch = min_pitch;
> @@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
>      else if (data && slice_pitch == 0)
>        slice_pitch = pitch;
>      if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
> -    if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
> +    if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
> @@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
>  
>      depth = 1;
>    } else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
> +             image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
>               image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
> +    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +      h = 1;
> +      tiling = CL_NO_TILE;
> +    } else if (cl_driver_get_ver(ctx->drv) != 6)
> +      tiling = cl_get_default_tiling();
> +
>      size_t min_pitch = bpp * w;
>      if (data && pitch == 0)
>        pitch = min_pitch;
> @@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
>        slice_pitch = min_slice_pitch;
>      if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
>      if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
> -    if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
> +    if (image_type == CL_MEM_OBJECT_IMAGE3D &&
> +       (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
> +    else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
>      if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
>  
> -    /* Pick up tiling mode (we do only linear on SNB) */
> -    if (cl_driver_get_ver(ctx->drv) != 6)
> -      tiling = cl_get_default_tiling();
>    } else
>      assert(0);
>  
> @@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
>    /* Tiling requires to align both pitch and height */
>    if (tiling == CL_NO_TILE) {
>      aligned_pitch = w * bpp;
> -    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
> -        image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
> -        image_type == CL_MEM_OBJECT_IMAGE3D)
> -      aligned_h = ALIGN(h, valign);
> -    else
> -      aligned_h     = h;
> +    aligned_h  = ALIGN(h, valign);
>    } else if (tiling == CL_TILE_X) {
>      aligned_pitch = ALIGN(w * bpp, tilex_w);
>      aligned_h     = ALIGN(h, tilex_h);
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 197d388..ab4cb0d 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -91,7 +91,7 @@ struct intel_gpgpu
>  
>    unsigned long img_bitmap;              /* image usage bitmap. */
>    unsigned int img_index_base;          /* base index for image surface.*/
> -  drm_intel_bo *binded_img[max_img_n];  /* all images binded for the call */
> +  drm_intel_bo *binded_img[max_img_n + 128];  /* all images binded for the call */
>  
>    unsigned long sampler_bitmap;          /* sampler usage bitmap. */
>  
> @@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
>    memset(ss, 0, sizeof(*ss));
>  
>    ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
> -  ss->ss0.surface_type = intel_get_surface_type(type);
> +  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +    ss->ss0.surface_type = I965_SURFACE_2D;
> +  else
> +    ss->ss0.surface_type = intel_get_surface_type(type);
>    if (intel_is_surface_array(type)) {
>      ss->ss0.surface_array = 1;
>      ss->ss0.surface_array_spacing = 1;
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list