[Beignet] [PATCH] Refactor all image builtin functions.

Yang, Rong R rong.r.yang at intel.com
Wed Dec 17 23:39:13 PST 2014


This patch LGTM, thanks.

> -----Original Message-----
> From: Gong, Zhigang
> Sent: Wednesday, December 17, 2014 09:42
> To: beignet at lists.freedesktop.org
> Cc: Yang, Rong R; Gong, Zhigang
> Subject: [PATCH] Refactor all image builtin functions.
> 
> Refactor almost all the image builtin related functions to simplfy the code
> and get rid of most of the awful macros.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/libocl/src/ocl_image.cl        | 811 ++++++++++++++++++---------
> --
>  backend/src/llvm/llvm_gen_backend.cpp      | 174 ++++---
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  36 +-
>  backend/src/llvm/llvm_scalarize.cpp        |  13 +-
>  4 files changed, 618 insertions(+), 416 deletions(-)
> 
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index fd421bf..95b98ff 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -20,29 +20,90 @@
>  #include "ocl_integer.h"
>  #include "ocl_common.h"
> 
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Beignet builtin functions.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +
>  // 1D read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        float u, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        int u, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          float u, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          int u, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          float u, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          int u, uint sampler_offset);
> 
>  // 2D & 1D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        float2 coord, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        int2 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          float2 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          int2 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          float2 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          int2 coord, uint sampler_offset);
> 
>  // 3D & 2D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        float4 coord, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> +                                        int4 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          float4 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> +                                          int4 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          float4 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> +                                          int4 coord, uint sampler_offset);
> +
> +// Don't know why we need to support 3 component coordinates, but it's in
> the old
> +// version, let's keep to support it.
> +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> +                                               float3 coord, uint sampler_offset)
> +{
> +   return __gen_ocl_read_imagei(surface_id, sampler,
> +            (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> +                                               int3 coord, uint sampler_offset)
> +{
> +  return __gen_ocl_read_imagei(surface_id, sampler,
> +           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> +                                                 float3 coord, uint sampler_offset)
> +{
> +  return __gen_ocl_read_imageui(surface_id, sampler,
> +           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> +                                                 int3 coord, uint sampler_offset)
> +{
> +  return __gen_ocl_read_imageui(surface_id, sampler,
> +           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> +                             float3 coord, uint sampler_offset)
> +{
> +  return __gen_ocl_read_imagef(surface_id, sampler,
> +           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> +                                                 int3 coord, uint sampler_offset)
> +{
> +  return __gen_ocl_read_imagef(surface_id, sampler,
> +           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> 
>  // 1D write
>  OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4
> color);
> @@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint
> surface_id, int u, uint4 color);
>  OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4
> color);
> 
>  // 2D & 1D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v,
> int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v,
> uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v,
> float4 color);
> +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord,
> int4 color);
> +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord,
> uint4 color);
> +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord,
> float4 color);
> 
>  // 3D & 2D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v,
> int w, int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v,
> int w, uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v,
> int w, float4 color);
> +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord,
> int4 color);
> +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord,
> uint4 color);
> +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord,
> float4 color);
> +
> +INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3
> coord, int4 color)
> +{
> +  __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0),
> color);
> +}
> +INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id,
> int3 coord, uint4 color)
> +{
> +  __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2,
> 0), color);
> +}
> +INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3
> coord, float4 color)
> +{
> +  __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2,
> 0), color);
> +}
> 
>  int __gen_ocl_get_image_width(uint surface_id);
>  int __gen_ocl_get_image_height(uint surface_id);
> @@ -65,225 +139,436 @@ int
> __gen_ocl_get_image_channel_data_type(uint surface_id);
>  int __gen_ocl_get_image_channel_order(uint surface_id);
>  int __gen_ocl_get_image_depth(uint surface_id);
> 
> -// 2D 3D Image Common Macro
> -#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> -#define GEN_FIX_1 1
> -#else
> -#define GEN_FIX_1 0
> -#endif
> 
>  #define GET_IMAGE(cl_image, surface_id) \
>      uint surface_id = (uint)cl_image
> -OVERLOADABLE float __gen_compute_array_index(const float index,
> image1d_array_t image)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// helper functions to validate array index.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord,
> image1d_array_t image)
>  {
>    GET_IMAGE(image, surface_id);
>    float array_size = __gen_ocl_get_image_depth(surface_id);
> -  return clamp(rint(index), 0.f, array_size - 1.f);
> +  coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
> +  return coord;
>  }
> 
> -OVERLOADABLE float __gen_compute_array_index(float index,
> image2d_array_t image)
> +INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord,
> image2d_array_t image)
>  {
>    GET_IMAGE(image, surface_id);
>    float array_size = __gen_ocl_get_image_depth(surface_id);
> -  return clamp(rint(index), 0.f, array_size - 1.f);
> +  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
> +  return coord;
>  }
> 
> -OVERLOADABLE int __gen_compute_array_index(int index,
> image1d_array_t image)
> +INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord,
> image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
> +  return coord;
> +}
> +
> +INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord,
> image1d_array_t image)
>  {
>    GET_IMAGE(image, surface_id);
>    int array_size = __gen_ocl_get_image_depth(surface_id);
> -  return clamp(index, 0, array_size - 1);
> +  coord.s1 = clamp(coord.s1, 0, array_size - 1);
> +  return coord;
>  }
> 
> -OVERLOADABLE int __gen_compute_array_index(int index,
> image2d_array_t image)
> +INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord,
> image2d_array_t image)
>  {
>    GET_IMAGE(image, surface_id);
>    int array_size = __gen_ocl_get_image_depth(surface_id);
> -  return clamp(index, 0, array_size - 1);
> -}
> -
> -#define DECL_READ_IMAGE0(int_clamping_fix,                                   \
> -                        image_type, type, suffix, coord_type, n)             \
> -  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
> -                                               const sampler_t sampler,      \
> -                                               coord_type coord)             \
> -  {                                                                          \
> -    GET_IMAGE(cl_image, surface_id);                                         \
> -    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai);                          \
> -    if (int_clamping_fix &&                                                  \
> -        ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
> \
> -        ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST))               \
> -            return   __gen_ocl_read_image ##suffix(                          \
> -                        EXPEND_READ_COORD(surface_id, sampler, coord));      \
> -    return  __gen_ocl_read_image ##suffix(                                   \
> -                    EXPEND_READ_COORDF(surface_id, sampler, coord), 0);      \
> -  }
> +  coord.s2 = clamp(coord.s2, 0, array_size - 1);
> +  return coord;
> +}
> 
> -#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,
> \
> -                        image_type, type, suffix, coord_type, n)             \
> -  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
> -                                               const sampler_t sampler,      \
> -                                               coord_type coord)             \
> -  {                                                                          \
> -    GET_IMAGE(cl_image, surface_id);                                         \
> -    GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai)                         \
> -    coord_type tmpCoord = coord;                                             \
> -    if (float_coord_rounding_fix | int_clamping_fix) {                       \
> -      if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> \
> -          && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {        \
> -        if (float_coord_rounding_fix                                         \
> -            && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) {              \
> -          FIXUP_FLOAT_COORD(tmpCoord);                                       \
> -        }                                                                    \
> -        if (int_clamping_fix) {                                              \
> -            coord_type intCoord;                                             \
> -            if (sampler & CLK_NORMALIZED_COORDS_TRUE) {                      \
> -              DENORMALIZE_COORD(surface_id, intCoord, tmpCoord);             \
> -            } else                                                           \
> -              intCoord = tmpCoord;                                           \
> -            return   __gen_ocl_read_image ##suffix(                          \
> -                       EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
> -       }                                                                     \
> -      }                                                                      \
> -    }                                                                        \
> -    return  __gen_ocl_read_image ##suffix(                                   \
> -                        EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
> -  }
> +INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord,
> image2d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  coord.s2 = clamp(coord.s2, 0, array_size - 1);
> +  return coord;
> +}
> +
> +// For non array image type, we need to do nothing.
> +#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
> +INLINE_OVERLOADABLE coord_type
> __gen_validate_array_index(coord_type coord, image_type image) \
> +{ \
> +  return coord; \
> +}
> +
> +GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
> +GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Helper functions to work around some coordiate boundary issues.
> +// The major issue on Gen7/Gen7.5 are the sample message could not
> sampling
> +// integer type surfaces correctly with CLK_ADDRESS_CLAMP and
> CLK_FILTER_NEAREST.
> +// The work around is to use a LD message instead of normal sample
> message.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +bool __gen_sampler_need_fix(const sampler_t sampler)
> +{
> +  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> +          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> +}
> +
> +bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
> +{
> +  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> +}
> +
> +
> +INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
> +{
> +  if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
> +    tmpCoord += -0x1p-9f;
> +  return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord)
> +{
> +  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> +    tmpCoord.s0 += -0x1p-9f;
> +  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> +    tmpCoord.s1 += -0x1p-9f;
> +  return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord)
> +{
> +  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> +    tmpCoord.s0 += -0x1p-9f;
> +  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> +    tmpCoord.s1 += -0x1p-9f;
> +  if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
> +    tmpCoord.s2 += -0x1p-9f;
> +  return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
> +{
> +  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> +    tmpCoord.s0 += -0x1p-9f;
> +  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> +    tmpCoord.s1 += -0x1p-9f;
> +  if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
> +    tmpCoord.s2 += -0x1p-9f;
> +  return tmpCoord;
> +}
> +
> +// Functions to denormalize coordiates, it's needed when we need to use
> LD
> +// message (sampler offset is non-zero) and the coordiates are normalized
> +// coordiates.
> +INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t
> image, float srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  return srcCoord * __gen_ocl_get_image_width(surface_id);
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const
> image1d_array_t image, float2 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_denormalize_coord(const
> image1d_buffer_t image, float srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  return srcCoord * __gen_ocl_get_image_width(surface_id);
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t
> image, float2 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const
> image2d_array_t image, float3 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t
> image, float3 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> +  return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const
> image2d_array_t image, float4 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t
> image, float4 srcCoord)
> +{
> +  GET_IMAGE(image, surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> +  return srcCoord;
> +}
> +
> +// After denormalize, we have to fixup the negative boundary.
> +INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord)
> +{
> +  return coord < 0 ? -1 : coord;
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord)
> +{
> +  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> +  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> +  return coord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord)
> +{
> +  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> +  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> +  coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
> +  return coord;
> +}
> 
> -#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix,
> coord_type, n)   \
> -  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
> -                                               coord_type coord)             \
> -  {                                                                          \
> -    GET_IMAGE(cl_image, surface_id);                                         \
> -    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai)                           \
> -    return __gen_ocl_read_image ##suffix(                                    \
> -           EXPEND_READ_COORDF(surface_id,                                    \
> -                             CLK_NORMALIZED_COORDS_FALSE                     \
> -                             | CLK_ADDRESS_NONE                              \
> -                             | CLK_FILTER_NEAREST, (float)coord), 0);        \
> +INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
> +{
> +  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> +  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> +  coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
> +  return coord;
> +}
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Built-in Image Read/Write Functions
> +/////////////////////////////////////////////////////////////////////////////
> //
> +
> +// 2D 3D Image Common Macro
> +#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> +#define GEN_FIX_FLOAT_ROUNDING 1
> +#define GEN_FIX_INT_CLAMPING 1
> +#else
> +#define GEN_FIX_FLOAT_ROUNDING 0
> +#define GEN_FIX_INT_CLAMPING 0
> +#endif
> +
> +// For integer coordinates
> +#define DECL_READ_IMAGE0(int_clamping_fix, image_type,                        \
> +                         image_data_type, suffix, coord_type)                 \
> +  OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image,       \
> +                                        const sampler_t sampler,              \
> +                                        coord_type coord)                     \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord = __gen_validate_array_index(coord, cl_image);                      \
> +    if (int_clamping_fix && __gen_sampler_need_fix(sampler))                  \
> +      return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1);
> \
> +    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
>    }
> 
> -#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
> -  OVERLOADABLE void write_image ##suffix(image_type cl_image,
> coord_type coord, type color)\
> -  {\
> -    GET_IMAGE(cl_image, surface_id);\
> -    __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id,
> coord, color));\
> +// For float coordinates
> +#define DECL_READ_IMAGE1(int_clamping_fix, image_type,                        \
> +                         image_data_type, suffix, coord_type)                 \
> +  OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image,       \
> +                                        const sampler_t sampler,              \
> +                                        coord_type coord)                     \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
> +    if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
> +      if (__gen_sampler_need_fix(sampler)) {                                  \
> +        if (GEN_FIX_FLOAT_ROUNDING &&                                         \
> +            __gen_sampler_need_rounding_fix(sampler))                         \
> +          tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
> +        if (int_clamping_fix) {                                               \
> +            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> +              tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
> +            tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
> +            return __gen_ocl_read_image ##suffix(                             \
> +                     surface_id, sampler, tmpCoord, 1);                       \
> +        }                                                                     \
> +      }                                                                       \
> +    }                                                                         \
> +    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0);  \
>    }
> 
> -#define DECL_IMAGE_INFO_COMMON(image_type)    \
> -  OVERLOADABLE  int get_image_channel_data_type(image_type image)\
> -  { \
> -    GET_IMAGE(image, surface_id);\
> -    return __gen_ocl_get_image_channel_data_type(surface_id); \
> -  }\
> -  OVERLOADABLE  int get_image_channel_order(image_type image)\
> -  { \
> -    GET_IMAGE(image, surface_id);\
> -    return __gen_ocl_get_image_channel_order(surface_id); \
> -  } \
> -  OVERLOADABLE int get_image_width(image_type image) \
> -  { \
> -    GET_IMAGE(image, surface_id); \
> -    return __gen_ocl_get_image_width(surface_id);  \
> +#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type,
> \
> +                                  suffix, coord_type)                         \
> +  OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image,       \
> +                                               coord_type coord)              \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord = __gen_validate_array_index(coord, cl_image);                      \
> +    return __gen_ocl_read_image ##suffix(                                     \
> +             surface_id, CLK_NORMALIZED_COORDS_FALSE |
> CLK_ADDRESS_NONE       \
> +             | CLK_FILTER_NEAREST, coord, 0);                                 \
>    }
> 
> -// 1D
> -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix)
> \
> -  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1)
> \
> -  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type,
> suffix, float, 1)  \
> -  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1)
> \
> -  DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
> -  DECL_WRITE_IMAGE(image_type, type, suffix, float)
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord
> < 0 ? -1 : coord), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord =
> srcCoord * __gen_ocl_get_image_width(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
> -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord)                            \
> -  {                                                            \
> -    if (tmpCoord < 0 && tmpCoord > -0x1p-20f)                  \
> -      tmpCoord += -0x1p-9f;                                     \
> +#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> coord_type)     \
> +  OVERLOADABLE void write_image ##suffix(image_type cl_image,                 \
> +                                         coord_type coord,                    \
> +                                         image_data_type color)               \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord_type fixedCoord = __gen_validate_array_index(coord, cl_image);
> \
> +    __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color);            \
>    }
> 
> -DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
> -DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
> -DECL_IMAGE(0, image1d_t, float4, f)
> -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i)
> -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui)
> -DECL_IMAGE(0, image1d_buffer_t, float4, f)
> +#define int1 int
> +#define float1 float
> 
> -// 1D Info
> -DECL_IMAGE_INFO_COMMON(image1d_t)
> -DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
> 
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef DECL_IMAGE
> -// End of 1D
> -
> -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n)
> \
> -  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n)
> \
> -  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type,
> suffix, float ##n, n) \
> -  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n)
> \
> -  DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
> -  DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
> -// 2D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler,
> (int)(coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> -                                                  dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord)                            \
> -  {                                                            \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> -      tmpCoord.s0 += -0x1p-9f;                                  \
> -    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)            \
> -      tmpCoord.s1 += -0x1p-9f;                                 \
> +#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type,
> suffix, n)  \
> +  DECL_READ_IMAGE0(int_clamping_fix, image_type,                              \
> +                   image_data_type, suffix, int ##n)                          \
> +  DECL_READ_IMAGE1(int_clamping_fix, image_type,                              \
> +                   image_data_type, suffix, float ##n)                        \
> +  DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix,
> int ##n)     \
> +  DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n)
> \
> +
> +// 1D
> +#define DECL_IMAGE_TYPE(image_type, n)                                        \
> +  DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n)                    \
> +  DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n)
> \
> +  DECL_IMAGE(0, image_type, float4, f, n)
> +
> +DECL_IMAGE_TYPE(image1d_t, 1)
> +DECL_IMAGE_TYPE(image1d_buffer_t, 1)
> +DECL_IMAGE_TYPE(image2d_t, 2)
> +DECL_IMAGE_TYPE(image3d_t, 4)
> +DECL_IMAGE_TYPE(image3d_t, 3)
> +DECL_IMAGE_TYPE(image2d_array_t, 4)
> +DECL_IMAGE_TYPE(image2d_array_t, 3)
> +
> +// For 1D Array:
> +// fixup_1darray_coord functions are to convert 1d array coord to 2d array
> coord
> +// and the caller must set the sampler offset to 2 by using this converted
> coord.
> +// It is used to work around an image 1d array restrication which could not
> set
> +// ai in the LD message. We solve it by fake the same image as a 2D array,
> and
> +// then access it by LD message as a 3D sufface, treat the ai as the w
> coordinate.
> +INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord,
> image1d_array_t image)
> +{
> +  float4 newCoord;
> +  newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> +  newCoord.s1 = 0;
> +  newCoord.s2 = coord.s1;
> +  newCoord.s3 = 0;
> +  return newCoord;
> +}
> +
> +INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord,
> image1d_array_t image)
> +{
> +  int4 newCoord;
> +  newCoord.s0 = coord.s0;
> +  newCoord.s1 = 0;
> +  newCoord.s2 = coord.s1;
> +  newCoord.s3 = 0;
> +  return newCoord;
> +}
> +
> +// For integer coordinates
> +#define DECL_READ_IMAGE0_1DArray(int_clamping_fix,                            \
> +                                 image_data_type, suffix, coord_type)         \
> +  OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t
> cl_image,  \
> +                                        const sampler_t sampler,              \
> +                                        coord_type coord)                     \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord = __gen_validate_array_index(coord, cl_image);                      \
> +    if (int_clamping_fix && __gen_sampler_need_fix(sampler)) {                \
> +      int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image);             \
> +      return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord,
> 2); \
> +    }                                                                         \
> +    return  __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
>    }
> 
> -DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
> -DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
> -DECL_IMAGE(0, image2d_t, float4, f, 2)
> -
> -// 1D Array
> -#undef GET_IMAGE_ARRAY_SIZE
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> (int)0, ai, 2
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)ai
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler,
> (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0,
> __gen_compute_array_index(coord.s1, cl_image), color
> -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> -  coord_type ai = __gen_compute_array_index(coord.s1, image);
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord)                            \
> -  {                                                            \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
> -      tmpCoord.s0 += -0x1p-9f;                                  \
> +// For float coordiates
> +#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,
> \
> +                                 suffix, coord_type)                          \
> +  OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t
> cl_image,  \
> +                                        const sampler_t sampler,              \
> +                                        coord_type coord)                     \
> +  {                                                                           \
> +    GET_IMAGE(cl_image, surface_id);                                          \
> +    coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
> +    if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
> +      if (__gen_sampler_need_fix(sampler)) {                                  \
> +        if (GEN_FIX_FLOAT_ROUNDING &&                                         \
> +            __gen_sampler_need_rounding_fix(sampler))                         \
> +          tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
> +        if (int_clamping_fix) {                                               \
> +            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> +              tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
> +            float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> \
> +            return __gen_ocl_read_image ##suffix(                             \
> +                     surface_id, sampler, newCoord, 2);                       \
> +        }                                                                     \
> +      }                                                                       \
> +    }                                                                         \
> +    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0);  \
>    }
> 
> -DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
> -DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
> -DECL_IMAGE(0, image1d_array_t, float4, f, 2)
> +#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> \
> +  DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix,
> int2)   \
> +  DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,
> \
> +                           suffix, float2)                                    \
> +  DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type,
> suffix, int2)   \
> +  DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2)
> \
> +
> +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i)
> +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui)
> +DECL_IMAGE_1DArray(0, float4, f)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Built-in Image Query Functions
> +/////////////////////////////////////////////////////////////////////////////
> //
> +#define DECL_IMAGE_INFO_COMMON(image_type)                                    \
> +  OVERLOADABLE  int get_image_channel_data_type(image_type image)
> \
> +  {                                                                           \
> +    GET_IMAGE(image, surface_id);                                             \
> +    return __gen_ocl_get_image_channel_data_type(surface_id);                 \
> +  }                                                                           \
> +  OVERLOADABLE  int get_image_channel_order(image_type image)
> \
> +  {                                                                           \
> +    GET_IMAGE(image, surface_id);                                             \
> +    return __gen_ocl_get_image_channel_order(surface_id);                     \
> +  }                                                                           \
> +  OVERLOADABLE int get_image_width(image_type image)                          \
> +  {                                                                           \
> +    GET_IMAGE(image, surface_id);                                             \
> +    return __gen_ocl_get_image_width(surface_id);                             \
> +  }
> 
> -// 2D Info
> +DECL_IMAGE_INFO_COMMON(image1d_t)
> +DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
> +DECL_IMAGE_INFO_COMMON(image1d_array_t)
>  DECL_IMAGE_INFO_COMMON(image2d_t)
> +DECL_IMAGE_INFO_COMMON(image3d_t)
> +DECL_IMAGE_INFO_COMMON(image2d_array_t)
> +
> +// 2D extra Info
>  OVERLOADABLE int get_image_height(image2d_t image)
>  {
>    GET_IMAGE(image, surface_id);
> @@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t
> image)
>  {
>    return (int2){get_image_width(image), get_image_height(image)};
>  }
> +// End of 2D
> 
> -// 1D Array info
> -DECL_IMAGE_INFO_COMMON(image1d_array_t)
> -OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> -{
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_depth(surface_id);
> -}
> -
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDI
> -#undef EXPEND_READ_COORDF
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -// End of 2D and 1D Array
> -
> -// 3D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, coord.s2, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1, (float)coord.s2
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)
> (coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ?
> -1 : coord.s2), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> -                                                  dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id); \
> -                                                  dstCoord.z = srcCoord.z *
> __gen_ocl_get_image_depth(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> coord.s2, color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord)                             \
> -  {                                                             \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
> -      tmpCoord.s0 += -0x1p-9f;                                   \
> -    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
> -      tmpCoord.s1 += -0x1p-9f;                                   \
> -    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)              \
> -      tmpCoord.s2 += -0x1p-9f;                                   \
> -  }
> -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
> -
> -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
> -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
> -DECL_IMAGE(0, image3d_t, float4, f, 4)
> -
> -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
> -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
> -DECL_IMAGE(0, image3d_t, float4, f, 3)
> -
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, ai, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1, (float)ai
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)
> (coord.s0 < 0 ? -1 : coord.s0), \
> -                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> -                                                  dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> __gen_compute_array_index(coord.s2, cl_image), color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord)                             \
> -  {                                                             \
> -    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
> -      tmpCoord.s0 += -0x1p-9f;                                   \
> -    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
> -      tmpCoord.s1 += -0x1p-9f;                                   \
> -  }
> -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> -  coord_type ai = __gen_compute_array_index(coord.s2, image);
> -
> -// 2D Array
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
> -DECL_IMAGE(0, image2d_array_t, float4, f, 4)
> -
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3)
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3)
> -DECL_IMAGE(0, image2d_array_t, float4, f, 3)
> -
> -// 3D Info
> -DECL_IMAGE_INFO_COMMON(image3d_t)
> +// 3D extra Info
>  OVERLOADABLE int get_image_height(image3d_t image)
>  {
>    GET_IMAGE(image, surface_id);
> @@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t
> image)
>  }
>  OVERLOADABLE int4 get_image_dim(image3d_t image)
>  {
> -  return (int4){get_image_width(image), get_image_height(image),
> get_image_depth(image), 0};
> +  return (int4) (get_image_width(image),
> +                 get_image_height(image),
> +                 get_image_depth(image),
> +                 0);
>  }
> 
> -// 2D Array Info
> -DECL_IMAGE_INFO_COMMON(image2d_array_t)
> +// 2D Array extra Info
>  OVERLOADABLE int get_image_height(image2d_array_t image)
>  {
>    GET_IMAGE(image, surface_id);
> @@ -409,21 +615,10 @@ OVERLOADABLE size_t
> get_image_array_size(image2d_array_t image)
>    return __gen_ocl_get_image_depth(surface_id);
>  }
> 
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -// End of 3D and 2D Array
> -
> -#undef DECL_IMAGE
> -#undef DECL_READ_IMAGE
> -#undef DECL_READ_IMAGE_NOSAMPLER
> -#undef DECL_WRITE_IMAGE
> -#undef GEN_FIX_1
> -// End of Image
> -
> -
> -#undef GET_IMAGE
> +// 1D Array info
> +OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> +{
> +  GET_IMAGE(image, surface_id);
> +  return __gen_ocl_get_image_depth(surface_id);
> +}
> +// End of 1DArray
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index a438f09..afaa4a5 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -257,9 +257,10 @@ namespace gbe
>    /*! Get number of element to process dealing either with a vector or a
> scalar
>     *  value
>     */
> -  static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value
> *value, uint32_t &elemNum, bool useUnsigned = false)
> +  static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t
> &elemNum, bool useUnsigned = false)
>    {
>      ir::Type type;
> +    Type *llvmType = value->getType();
>      if (llvmType->isVectorTy() == true) {
>        VectorType *vectorType = cast<VectorType>(llvmType);
>        Type *elementType = vectorType->getElementType();
> @@ -629,6 +630,7 @@ namespace gbe
>      void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
> 
>      uint8_t appendSampler(CallSite::arg_iterator AI);
> +    uint8_t getImageID(CallInst &I);
> 
>      // These instructions are not supported at all
>      void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
> @@ -2526,8 +2528,8 @@ namespace gbe
>          Value *srcValue = I.getOperand(0);
>          Value *dstValue = &I;
>          uint32_t srcElemNum = 0, dstElemNum = 0 ;
> -        ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue,
> srcElemNum);
> -        ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue,
> dstElemNum);
> +        ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum);
> +        ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum);
>          // As long and double are not compatible in register storage
>          // and we do not support double yet, simply put an assert here
>          GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType ==
> ir::TYPE_DOUBLE));
> @@ -2927,7 +2929,7 @@ namespace gbe
>        {
>          // dst is a 4 elements vector. We allocate all 4 registers here.
>          uint32_t elemNum;
> -        (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
> +        (void)getVectorInfo(ctx, &I, elemNum);
>          GBE_ASSERT(elemNum == 4);
>          this->newRegister(&I);
>          break;
> @@ -3055,6 +3057,15 @@ namespace gbe
>      return index;
>    }
> 
> +  uint8_t GenWriter::getImageID(CallInst &I) {
> +    PtrOrigMapIter iter = pointerOrigMap.find(&I);
> +    GBE_ASSERT(iter != pointerOrigMap.end());
> +    SmallVectorImpl<Value *> &origins = iter->second;
> +    GBE_ASSERT(origins.size() == 1);
> +    const ir::Register imageReg = this->getRegister(origins[0]);
> +    return ctx.getFunction().getImageSet()->getIdx(imageReg);
> +  }
> +
>    void GenWriter::emitCallInst(CallInst &I) {
>      if (Function *F = I.getCalledFunction()) {
>        if (F->getIntrinsicID() != 0) {
> @@ -3218,7 +3229,6 @@ namespace gbe
>            default: NOT_IMPLEMENTED;
>          }
>        } else {
> -        int image_dim;
>          // Get the name of the called function and handle it
>          Value *Callee = I.getCalledValue();
>          const std::string fnName = Callee->getName();
> @@ -3334,13 +3344,13 @@ namespace gbe
>            case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
>            case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
>            {
> -            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> +            const uint8_t imageID = getImageID(I);
> +            GBE_ASSERT(AI != AE); ++AI;
>              const ir::Register reg = this->getRegister(&I, 0);
>              int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
> -            const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> -            ir::ImageInfoKey key(surfaceID, infoType);
> +            ir::ImageInfoKey key(imageID, infoType);
>              const ir::Register infoReg = ctx.getFunction().getImageSet()-
> >appendInfo(key, &ctx);
> -            ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
> +            ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
>              break;
>            }
> 
> @@ -3350,69 +3360,75 @@ namespace gbe
>            case GEN_OCL_READ_IMAGE_I_1D_I:
>            case GEN_OCL_READ_IMAGE_UI_1D_I:
>            case GEN_OCL_READ_IMAGE_F_1D_I:
> -            image_dim = 1;
> -            goto handle_read_image;
>            case GEN_OCL_READ_IMAGE_I_2D:
>            case GEN_OCL_READ_IMAGE_UI_2D:
>            case GEN_OCL_READ_IMAGE_F_2D:
>            case GEN_OCL_READ_IMAGE_I_2D_I:
>            case GEN_OCL_READ_IMAGE_UI_2D_I:
>            case GEN_OCL_READ_IMAGE_F_2D_I:
> -            image_dim = 2;
> -            goto handle_read_image;
>            case GEN_OCL_READ_IMAGE_I_3D:
>            case GEN_OCL_READ_IMAGE_UI_3D:
>            case GEN_OCL_READ_IMAGE_F_3D:
>            case GEN_OCL_READ_IMAGE_I_3D_I:
>            case GEN_OCL_READ_IMAGE_UI_3D_I:
>            case GEN_OCL_READ_IMAGE_F_3D_I:
> -            image_dim = 3;
> -handle_read_image:
>            {
> -            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> -            const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> +            const uint8_t imageID = getImageID(I);
> +            GBE_ASSERT(AI != AE); ++AI;
>              GBE_ASSERT(AI != AE);
>              const uint8_t sampler = this->appendSampler(AI);
> -            ++AI;
> -
> -            ir::Register ucoord;
> -            ir::Register vcoord;
> -            ir::Register wcoord;
> -
> -            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
> -            if (image_dim > 1) {
> -              GBE_ASSERT(AI != AE);
> -              vcoord = this->getRegister(*AI);
> -              ++AI;
> -            } else {
> -              vcoord = ir::ocl::invalid;
> -            }
> -
> -            if (image_dim > 2) {
> -              GBE_ASSERT(AI != AE);
> -              wcoord = this->getRegister(*AI);
> -              ++AI;
> -            } else {
> -              wcoord = ir::ocl::invalid;
> -            }
> +            ++AI; GBE_ASSERT(AI != AE);
> +            uint32_t coordNum;
> +            (void)getVectorInfo(ctx, *AI, coordNum);
> +            if (coordNum == 4)
> +              coordNum = 3;
> +            const uint32_t imageDim = coordNum;
> +            GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
> 
> -            vector<ir::Register> dstTupleData, srcTupleData;
> -            const uint32_t elemNum = 4;
> -            for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> -              const ir::Register reg = this->getRegister(&I, elemID);
> -              dstTupleData.push_back(reg);
> -            }
> -            srcTupleData.push_back(ucoord);
> -            srcTupleData.push_back(vcoord);
> -            srcTupleData.push_back(wcoord);
>              uint8_t samplerOffset = 0;
> +            Value *coordVal = *AI;
> +            ++AI; GBE_ASSERT(AI != AE);
> +            Value *samplerOffsetVal = *AI;
>  #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> -            GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
> +            Constant *CPV = dyn_cast<Constant>(samplerOffsetVal);
>              assert(CPV);
>              const ir::Immediate &x = processConstantImm(CPV);
>              GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> ir::TYPE_S32, "Invalid sampler type");
>              samplerOffset = x.getIntegerValue();
>  #endif
> +            bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> +            bool requiredFloatCoord = samplerOffset == 0;
> +
> +            vector<ir::Register> dstTupleData, srcTupleData;
> +            for (uint32_t elemID = 0; elemID < 3; elemID++) {
> +              ir::Register reg;
> +
> +              if (elemID < imageDim)
> +                reg = this->getRegister(coordVal, elemID);
> +              else
> +                reg = ir::ocl::invalid;
> +
> +              if (isFloatCoord == requiredFloatCoord)
> +                srcTupleData.push_back(reg);
> +              else if (!requiredFloatCoord) {
> +                ir::Register intCoordReg =
> ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> +                ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg);
> +                srcTupleData.push_back(intCoordReg);
> +              } else {
> +                ir::Register floatCoordReg =
> ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> +                ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg);
> +                srcTupleData.push_back(floatCoordReg);
> +              }
> +            }
> +
> +            uint32_t elemNum;
> +            (void)getVectorInfo(ctx, &I, elemNum);
> +            GBE_ASSERT(elemNum == 4);
> +
> +            for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> +              const ir::Register reg = this->getRegister(&I, elemID);
> +              dstTupleData.push_back(reg);
> +            }
>              const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
>              const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
> 
> @@ -3445,58 +3461,46 @@ handle_read_image:
>                  GBE_ASSERT(0); // never been here.
>              }
> 
> -            bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> -
> -            ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
> -                       isFloatCoord, sampler, samplerOffset);
> +            ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
> +                       requiredFloatCoord, sampler, samplerOffset);
>              break;
>            }
> 
>            case GEN_OCL_WRITE_IMAGE_I_1D:
>            case GEN_OCL_WRITE_IMAGE_UI_1D:
>            case GEN_OCL_WRITE_IMAGE_F_1D:
> -            image_dim = 1;
> -            goto handle_write_image;
>            case GEN_OCL_WRITE_IMAGE_I_2D:
>            case GEN_OCL_WRITE_IMAGE_UI_2D:
>            case GEN_OCL_WRITE_IMAGE_F_2D:
> -            image_dim = 2;
> -            goto handle_write_image;
>            case GEN_OCL_WRITE_IMAGE_I_3D:
>            case GEN_OCL_WRITE_IMAGE_UI_3D:
>            case GEN_OCL_WRITE_IMAGE_F_3D:
> -            image_dim = 3;
> -handle_write_image:
>            {
> -            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> -            const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> -            ir::Register ucoord, vcoord, wcoord;
> -
> -            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
> +            const uint8_t imageID = getImageID(I);
> +            GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
> +            uint32_t coordNum;
> +            (void)getVectorInfo(ctx, *AI, coordNum);
> +            if (coordNum == 4)
> +              coordNum = 3;
> +            const uint32_t imageDim = coordNum;
> +            vector<ir::Register> srcTupleData;
> +            GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
> 
> -            if (image_dim > 1) {
> -              GBE_ASSERT(AI != AE);
> -              vcoord = this->getRegister(*AI);
> -              ++AI;
> -            } else
> -              vcoord = ir::ocl::invalid;
> -
> -            if (image_dim > 2) {
> -              GBE_ASSERT(AI != AE);
> -              wcoord = this->getRegister(*AI);
> -              ++AI;
> -            } else {
> -              wcoord = ir::ocl::invalid;
> -            }
> +            for (uint32_t elemID = 0; elemID < 3; elemID++) {
> +              ir::Register reg;
> 
> -            GBE_ASSERT(AI != AE);
> -            vector<ir::Register> srcTupleData;
> +              if (elemID < imageDim)
> +                reg = this->getRegister(*AI, elemID);
> +              else
> +                reg = ir::ocl::invalid;
> 
> -            srcTupleData.push_back(ucoord);
> -            srcTupleData.push_back(vcoord);
> -            srcTupleData.push_back(wcoord);
> +              srcTupleData.push_back(reg);
> +            }
> +            ++AI; GBE_ASSERT(AI != AE);
> +            uint32_t elemNum;
> +            (void)getVectorInfo(ctx, *AI, elemNum);
> +            GBE_ASSERT(elemNum == 4);
> 
> -            const uint32_t elemNum = 4;
>              for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
>                const ir::Register reg = this->getRegister(*AI, elemID);
>                srcTupleData.push_back(reg);
> @@ -3523,7 +3527,7 @@ handle_write_image:
>                  GBE_ASSERT(0); // never been here.
>              }
> 
> -            ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
> +            ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
>              break;
>            }
>            case GEN_OCL_MUL_HI_INT:
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 7434c78..8d55c3f 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16,
> __gen_ocl_force_simd16)
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D,
> _Z21__gen_ocl_read_imageijtfj)
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D,
> _Z22__gen_ocl_read_imageuijtfj)
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D,
> _Z21__gen_ocl_read_imagefjtfj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D,
> _Z21__gen_ocl_read_imageijtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D,
> _Z22__gen_ocl_read_imageuijtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D,
> _Z21__gen_ocl_read_imagefjtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
> _Z21__gen_ocl_read_imageijtfffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
> _Z22__gen_ocl_read_imageuijtfffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
> _Z21__gen_ocl_read_imagefjtfffj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D,
> _Z21__gen_ocl_read_imageijtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D,
> _Z22__gen_ocl_read_imageuijtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D,
> _Z21__gen_ocl_read_imagefjtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
> _Z21__gen_ocl_read_imageijtDv4_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
> _Z22__gen_ocl_read_imageuijtDv4_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
> _Z21__gen_ocl_read_imagefjtDv4_fj)
>  // work around read image with the LD message. The coords are integer
> type.
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I,
> _Z21__gen_ocl_read_imageijtij)
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I,
> _Z22__gen_ocl_read_imageuijtij)
>  DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I,
> _Z21__gen_ocl_read_imagefjtij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I,
> _Z21__gen_ocl_read_imageijtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I,
> _Z22__gen_ocl_read_imageuijtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I,
> _Z21__gen_ocl_read_imagefjtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> _Z21__gen_ocl_read_imageijtiiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> _Z22__gen_ocl_read_imageuijtiiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> _Z21__gen_ocl_read_imagefjtiiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I,
> _Z21__gen_ocl_read_imageijtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I,
> _Z22__gen_ocl_read_imageuijtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I,
> _Z21__gen_ocl_read_imagefjtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> _Z21__gen_ocl_read_imageijtDv4_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> _Z22__gen_ocl_read_imageuijtDv4_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> _Z21__gen_ocl_read_imagefjtDv4_ij)
> 
>  // To write_image functions.
>  DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D,
> _Z22__gen_ocl_write_imageijiDv4_i)
>  DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D,
> _Z23__gen_ocl_write_imageuijiDv4_j)
>  DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D,
> _Z22__gen_ocl_write_imagefjiDv4_f)
> 
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D,
> _Z22__gen_ocl_write_imageijiiDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D,
> _Z23__gen_ocl_write_imageuijiiDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D,
> _Z22__gen_ocl_write_imagefjiiDv4_f)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D,
> _Z22__gen_ocl_write_imageijDv2_iDv4_i)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D,
> _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D,
> _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
> 
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
> _Z22__gen_ocl_write_imageijiiiDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
> _Z23__gen_ocl_write_imageuijiiiDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
> _Z22__gen_ocl_write_imagefjiiiDv4_f)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
> _Z22__gen_ocl_write_imageijDv4_iS_)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
> _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
> _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
> 
>  // To get image info function
>  DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH,
> __gen_ocl_get_image_width)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 5450a2b..baf526b 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -648,7 +648,7 @@ namespace gbe {
> 
>          // Get the function arguments
>          CallSite CS(call);
> -        CallSite::arg_iterator CI = CS.arg_begin() + 2;
> +        CallSite::arg_iterator CI = CS.arg_begin() + 1;
> 
>          switch (it->second) {
>            default: break;
> @@ -661,8 +661,7 @@ namespace gbe {
>            case GEN_OCL_READ_IMAGE_I_3D:
>            case GEN_OCL_READ_IMAGE_UI_3D:
>            case GEN_OCL_READ_IMAGE_F_3D:
> -
> -	  case GEN_OCL_READ_IMAGE_I_1D_I:
> +          case GEN_OCL_READ_IMAGE_I_1D_I:
>            case GEN_OCL_READ_IMAGE_UI_1D_I:
>            case GEN_OCL_READ_IMAGE_F_1D_I:
>            case GEN_OCL_READ_IMAGE_I_2D_I:
> @@ -674,6 +673,9 @@ namespace gbe {
>            case GEN_OCL_GET_IMAGE_WIDTH:
>            case GEN_OCL_GET_IMAGE_HEIGHT:
>            {
> +            ++CI;
> +            if ((*CI)->getType()->isVectorTy())
> +              *CI = InsertToVector(call, *CI);
>              setAppendPoint(call);
>              extractFromVector(call);
>              break;
> @@ -681,15 +683,16 @@ namespace gbe {
>            case GEN_OCL_WRITE_IMAGE_I_3D:
>            case GEN_OCL_WRITE_IMAGE_UI_3D:
>            case GEN_OCL_WRITE_IMAGE_F_3D:
> -            CI++;
>            case GEN_OCL_WRITE_IMAGE_I_2D:
>            case GEN_OCL_WRITE_IMAGE_UI_2D:
>            case GEN_OCL_WRITE_IMAGE_F_2D:
> -            CI++;
>            case GEN_OCL_WRITE_IMAGE_I_1D:
>            case GEN_OCL_WRITE_IMAGE_UI_1D:
>            case GEN_OCL_WRITE_IMAGE_F_1D:
>            {
> +            if ((*CI)->getType()->isVectorTy())
> +              *CI = InsertToVector(call, *CI);
> +            ++CI;
>              *CI = InsertToVector(call, *CI);
>              break;
>            }
> --
> 1.8.3.2



More information about the Beignet mailing list