[Beignet] [PATCH] Refactor all image builtin functions.

Zhigang Gong zhigang.gong at intel.com
Tue Dec 16 17:41:37 PST 2014


Refactor almost all the image builtin related functions to simplfy the code
and get rid of most of the awful macros.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/libocl/src/ocl_image.cl        | 811 ++++++++++++++++++-----------
 backend/src/llvm/llvm_gen_backend.cpp      | 174 ++++---
 backend/src/llvm/llvm_gen_ocl_function.hxx |  36 +-
 backend/src/llvm/llvm_scalarize.cpp        |  13 +-
 4 files changed, 618 insertions(+), 416 deletions(-)

diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index fd421bf..95b98ff 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -20,29 +20,90 @@
 #include "ocl_integer.h"
 #include "ocl_common.h"
 
+///////////////////////////////////////////////////////////////////////////////
+// Beignet builtin functions.
+///////////////////////////////////////////////////////////////////////////////
+
 // 1D read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        float u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        int u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          float u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          int u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          float u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          int u, uint sampler_offset);
 
 // 2D & 1D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        float2 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        int2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          float2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          int2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          float2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          int2 coord, uint sampler_offset);
 
 // 3D & 2D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        float4 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                        int4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          float4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                          int4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          float4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                          int4 coord, uint sampler_offset);
+
+// Don't know why we need to support 3 component coordinates, but it's in the old
+// version, let's keep to support it.
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                               float3 coord, uint sampler_offset)
+{
+   return __gen_ocl_read_imagei(surface_id, sampler,
+            (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+                                               int3 coord, uint sampler_offset)
+{
+  return __gen_ocl_read_imagei(surface_id, sampler,
+           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                                 float3 coord, uint sampler_offset)
+{
+  return __gen_ocl_read_imageui(surface_id, sampler,
+           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+                                                 int3 coord, uint sampler_offset)
+{
+  return __gen_ocl_read_imageui(surface_id, sampler,
+           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                             float3 coord, uint sampler_offset)
+{
+  return __gen_ocl_read_imagef(surface_id, sampler,
+           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+                                                 int3 coord, uint sampler_offset)
+{
+  return __gen_ocl_read_imagef(surface_id, sampler,
+           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
 
 // 1D write
 OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
@@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
 OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
 
 // 2D & 1D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color);
 
 // 3D & 2D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color);
+
+INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color)
+{
+  __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color)
+{
+  __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color)
+{
+  __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
 
 int __gen_ocl_get_image_width(uint surface_id);
 int __gen_ocl_get_image_height(uint surface_id);
@@ -65,225 +139,436 @@ int __gen_ocl_get_image_channel_data_type(uint surface_id);
 int __gen_ocl_get_image_channel_order(uint surface_id);
 int __gen_ocl_get_image_depth(uint surface_id);
 
-// 2D 3D Image Common Macro
-#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
-#define GEN_FIX_1 1
-#else
-#define GEN_FIX_1 0
-#endif
 
 #define GET_IMAGE(cl_image, surface_id) \
     uint surface_id = (uint)cl_image
-OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
+
+///////////////////////////////////////////////////////////////////////////////
+// helper functions to validate array index.
+///////////////////////////////////////////////////////////////////////////////
+INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
 {
   GET_IMAGE(image, surface_id);
   float array_size = __gen_ocl_get_image_depth(surface_id);
-  return clamp(rint(index), 0.f, array_size - 1.f);
+  coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
+  return coord;
 }
 
-OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
+INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
 {
   GET_IMAGE(image, surface_id);
   float array_size = __gen_ocl_get_image_depth(surface_id);
-  return clamp(rint(index), 0.f, array_size - 1.f);
+  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+  return coord;
 }
 
-OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
+INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  float array_size = __gen_ocl_get_image_depth(surface_id);
+  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+  return coord;
+}
+
+INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
 {
   GET_IMAGE(image, surface_id);
   int array_size = __gen_ocl_get_image_depth(surface_id);
-  return clamp(index, 0, array_size - 1);
+  coord.s1 = clamp(coord.s1, 0, array_size - 1);
+  return coord;
 }
 
-OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
+INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
 {
   GET_IMAGE(image, surface_id);
   int array_size = __gen_ocl_get_image_depth(surface_id);
-  return clamp(index, 0, array_size - 1);
-}
-
-#define DECL_READ_IMAGE0(int_clamping_fix,                                   \
-                        image_type, type, suffix, coord_type, n)             \
-  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
-                                               const sampler_t sampler,      \
-                                               coord_type coord)             \
-  {                                                                          \
-    GET_IMAGE(cl_image, surface_id);                                         \
-    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai);                          \
-    if (int_clamping_fix &&                                                  \
-        ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&             \
-        ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST))               \
-            return   __gen_ocl_read_image ##suffix(                          \
-                        EXPEND_READ_COORD(surface_id, sampler, coord));      \
-    return  __gen_ocl_read_image ##suffix(                                   \
-                    EXPEND_READ_COORDF(surface_id, sampler, coord), 0);      \
-  }
+  coord.s2 = clamp(coord.s2, 0, array_size - 1);
+  return coord;
+}
 
-#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,         \
-                        image_type, type, suffix, coord_type, n)             \
-  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
-                                               const sampler_t sampler,      \
-                                               coord_type coord)             \
-  {                                                                          \
-    GET_IMAGE(cl_image, surface_id);                                         \
-    GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai)                         \
-    coord_type tmpCoord = coord;                                             \
-    if (float_coord_rounding_fix | int_clamping_fix) {                       \
-      if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)              \
-          && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {        \
-        if (float_coord_rounding_fix                                         \
-            && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) {              \
-          FIXUP_FLOAT_COORD(tmpCoord);                                       \
-        }                                                                    \
-        if (int_clamping_fix) {                                              \
-            coord_type intCoord;                                             \
-            if (sampler & CLK_NORMALIZED_COORDS_TRUE) {                      \
-              DENORMALIZE_COORD(surface_id, intCoord, tmpCoord);             \
-            } else                                                           \
-              intCoord = tmpCoord;                                           \
-            return   __gen_ocl_read_image ##suffix(                          \
-                       EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
-       }                                                                     \
-      }                                                                      \
-    }                                                                        \
-    return  __gen_ocl_read_image ##suffix(                                   \
-                        EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
-  }
+INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  int array_size = __gen_ocl_get_image_depth(surface_id);
+  coord.s2 = clamp(coord.s2, 0, array_size - 1);
+  return coord;
+}
+
+// For non array image type, we need to do nothing.
+#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
+INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \
+{ \
+  return coord; \
+}
+
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
+
+///////////////////////////////////////////////////////////////////////////////
+// Helper functions to work around some coordiate boundary issues.
+// The major issue on Gen7/Gen7.5 are the sample message could not sampling
+// integer type surfaces correctly with CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
+// The work around is to use a LD message instead of normal sample message.
+///////////////////////////////////////////////////////////////////////////////
+bool __gen_sampler_need_fix(const sampler_t sampler)
+{
+  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
+          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
+}
+
+bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
+{
+  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
+}
+
+
+INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
+{
+  if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
+    tmpCoord += -0x1p-9f;
+  return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord)
+{
+  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+    tmpCoord.s0 += -0x1p-9f;
+  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+    tmpCoord.s1 += -0x1p-9f;
+  return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord)
+{
+  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+    tmpCoord.s0 += -0x1p-9f;
+  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+    tmpCoord.s1 += -0x1p-9f;
+  if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+    tmpCoord.s2 += -0x1p-9f;
+  return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
+{
+  if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+    tmpCoord.s0 += -0x1p-9f;
+  if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+    tmpCoord.s1 += -0x1p-9f;
+  if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+    tmpCoord.s2 += -0x1p-9f;
+  return tmpCoord;
+}
+
+// Functions to denormalize coordiates, it's needed when we need to use LD
+// message (sampler offset is non-zero) and the coordiates are normalized
+// coordiates.
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  return srcCoord;
+}
+
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+  return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+  return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+  return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+  return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord)
+{
+  GET_IMAGE(image, surface_id);
+  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+  return srcCoord;
+}
+
+// After denormalize, we have to fixup the negative boundary.
+INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord)
+{
+  return coord < 0 ? -1 : coord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord)
+{
+  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+  return coord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord)
+{
+  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+  coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+  return coord;
+}
 
-#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n)   \
-  OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
-                                               coord_type coord)             \
-  {                                                                          \
-    GET_IMAGE(cl_image, surface_id);                                         \
-    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai)                           \
-    return __gen_ocl_read_image ##suffix(                                    \
-           EXPEND_READ_COORDF(surface_id,                                    \
-                             CLK_NORMALIZED_COORDS_FALSE                     \
-                             | CLK_ADDRESS_NONE                              \
-                             | CLK_FILTER_NEAREST, (float)coord), 0);        \
+INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
+{
+  coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+  coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+  coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+  return coord;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Read/Write Functions
+///////////////////////////////////////////////////////////////////////////////
+
+// 2D 3D Image Common Macro
+#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
+#define GEN_FIX_FLOAT_ROUNDING 1
+#define GEN_FIX_INT_CLAMPING 1
+#else
+#define GEN_FIX_FLOAT_ROUNDING 0
+#define GEN_FIX_INT_CLAMPING 0
+#endif
+
+// For integer coordinates
+#define DECL_READ_IMAGE0(int_clamping_fix, image_type,                        \
+                         image_data_type, suffix, coord_type)                 \
+  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
+                                        const sampler_t sampler,              \
+                                        coord_type coord)                     \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord = __gen_validate_array_index(coord, cl_image);                      \
+    if (int_clamping_fix && __gen_sampler_need_fix(sampler))                  \
+      return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1);    \
+    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);      \
   }
 
-#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
-  OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\
-  {\
-    GET_IMAGE(cl_image, surface_id);\
-    __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
+// For float coordinates
+#define DECL_READ_IMAGE1(int_clamping_fix, image_type,                        \
+                         image_data_type, suffix, coord_type)                 \
+  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
+                                        const sampler_t sampler,              \
+                                        coord_type coord)                     \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);        \
+    if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
+      if (__gen_sampler_need_fix(sampler)) {                                  \
+        if (GEN_FIX_FLOAT_ROUNDING &&                                         \
+            __gen_sampler_need_rounding_fix(sampler))                         \
+          tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
+        if (int_clamping_fix) {                                               \
+            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
+              tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
+            tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
+            return __gen_ocl_read_image ##suffix(                             \
+                     surface_id, sampler, tmpCoord, 1);                       \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
+    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0);  \
   }
 
-#define DECL_IMAGE_INFO_COMMON(image_type)    \
-  OVERLOADABLE  int get_image_channel_data_type(image_type image)\
-  { \
-    GET_IMAGE(image, surface_id);\
-    return __gen_ocl_get_image_channel_data_type(surface_id); \
-  }\
-  OVERLOADABLE  int get_image_channel_order(image_type image)\
-  { \
-    GET_IMAGE(image, surface_id);\
-    return __gen_ocl_get_image_channel_order(surface_id); \
-  } \
-  OVERLOADABLE int get_image_width(image_type image) \
-  { \
-    GET_IMAGE(image, surface_id); \
-    return __gen_ocl_get_image_width(surface_id);  \
+#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type,                \
+                                  suffix, coord_type)                         \
+  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
+                                               coord_type coord)              \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord = __gen_validate_array_index(coord, cl_image);                      \
+    return __gen_ocl_read_image ##suffix(                                     \
+             surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE       \
+             | CLK_FILTER_NEAREST, coord, 0);                                 \
   }
 
-// 1D
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix)                       \
-  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1)               \
-  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1)  \
-  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1)                        \
-  DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
-  DECL_WRITE_IMAGE(image_type, type, suffix, float)
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-#define FIXUP_FLOAT_COORD(tmpCoord)                            \
-  {                                                            \
-    if (tmpCoord < 0 && tmpCoord > -0x1p-20f)                  \
-      tmpCoord += -0x1p-9f;                                     \
+#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type)     \
+  OVERLOADABLE void write_image ##suffix(image_type cl_image,                 \
+                                         coord_type coord,                    \
+                                         image_data_type color)               \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord_type fixedCoord = __gen_validate_array_index(coord, cl_image);      \
+    __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color);            \
   }
 
-DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
-DECL_IMAGE(0, image1d_t, float4, f)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui)
-DECL_IMAGE(0, image1d_buffer_t, float4, f)
+#define int1 int
+#define float1 float
 
-// 1D Info
-DECL_IMAGE_INFO_COMMON(image1d_t)
-DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
 
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef DECL_IMAGE
-// End of 1D
-
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n)                       \
-  DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n)              \
-  DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
-  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n)                       \
-  DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
-  DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
-// 2D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
-                                               (int)(coord.s1 < 0 ? -1 : coord.s1), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
-                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord)                            \
-  {                                                            \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
-      tmpCoord.s0 += -0x1p-9f;                                  \
-    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)            \
-      tmpCoord.s1 += -0x1p-9f;                                 \
+#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n)  \
+  DECL_READ_IMAGE0(int_clamping_fix, image_type,                              \
+                   image_data_type, suffix, int ##n)                          \
+  DECL_READ_IMAGE1(int_clamping_fix, image_type,                              \
+                   image_data_type, suffix, float ##n)                        \
+  DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n)     \
+  DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n)             \
+
+// 1D
+#define DECL_IMAGE_TYPE(image_type, n)                                        \
+  DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n)                    \
+  DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n)                  \
+  DECL_IMAGE(0, image_type, float4, f, n)
+
+DECL_IMAGE_TYPE(image1d_t, 1)
+DECL_IMAGE_TYPE(image1d_buffer_t, 1)
+DECL_IMAGE_TYPE(image2d_t, 2)
+DECL_IMAGE_TYPE(image3d_t, 4)
+DECL_IMAGE_TYPE(image3d_t, 3)
+DECL_IMAGE_TYPE(image2d_array_t, 4)
+DECL_IMAGE_TYPE(image2d_array_t, 3)
+
+// For 1D Array:
+// fixup_1darray_coord functions are to convert 1d array coord to 2d array coord
+// and the caller must set the sampler offset to 2 by using this converted coord.
+// It is used to work around an image 1d array restrication which could not set
+// ai in the LD message. We solve it by fake the same image as a 2D array, and
+// then access it by LD message as a 3D sufface, treat the ai as the w coordinate.
+INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord, image1d_array_t image)
+{
+  float4 newCoord;
+  newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+  newCoord.s1 = 0;
+  newCoord.s2 = coord.s1;
+  newCoord.s3 = 0;
+  return newCoord;
+}
+
+INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t image)
+{
+  int4 newCoord;
+  newCoord.s0 = coord.s0;
+  newCoord.s1 = 0;
+  newCoord.s2 = coord.s1;
+  newCoord.s3 = 0;
+  return newCoord;
+}
+
+// For integer coordinates
+#define DECL_READ_IMAGE0_1DArray(int_clamping_fix,                            \
+                                 image_data_type, suffix, coord_type)         \
+  OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image,  \
+                                        const sampler_t sampler,              \
+                                        coord_type coord)                     \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord = __gen_validate_array_index(coord, cl_image);                      \
+    if (int_clamping_fix && __gen_sampler_need_fix(sampler)) {                \
+      int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image);             \
+      return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \
+    }                                                                         \
+    return  __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);     \
   }
 
-DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
-DECL_IMAGE(0, image2d_t, float4, f, 2)
-
-// 1D Array
-#undef GET_IMAGE_ARRAY_SIZE
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
-  coord_type ai = __gen_compute_array_index(coord.s1, image);
-
-#define FIXUP_FLOAT_COORD(tmpCoord)                            \
-  {                                                            \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
-      tmpCoord.s0 += -0x1p-9f;                                  \
+// For float coordiates
+#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,           \
+                                 suffix, coord_type)                          \
+  OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image,  \
+                                        const sampler_t sampler,              \
+                                        coord_type coord)                     \
+  {                                                                           \
+    GET_IMAGE(cl_image, surface_id);                                          \
+    coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);        \
+    if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
+      if (__gen_sampler_need_fix(sampler)) {                                  \
+        if (GEN_FIX_FLOAT_ROUNDING &&                                         \
+            __gen_sampler_need_rounding_fix(sampler))                         \
+          tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
+        if (int_clamping_fix) {                                               \
+            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
+              tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
+            float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);  \
+            return __gen_ocl_read_image ##suffix(                             \
+                     surface_id, sampler, newCoord, 2);                       \
+        }                                                                     \
+      }                                                                       \
+    }                                                                         \
+    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0);  \
   }
 
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
-DECL_IMAGE(0, image1d_array_t, float4, f, 2)
+#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)         \
+  DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, int2)   \
+  DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,                 \
+                           suffix, float2)                                    \
+  DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2)   \
+  DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2)            \
+
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i)
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui)
+DECL_IMAGE_1DArray(0, float4, f)
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Query Functions
+///////////////////////////////////////////////////////////////////////////////
+#define DECL_IMAGE_INFO_COMMON(image_type)                                    \
+  OVERLOADABLE  int get_image_channel_data_type(image_type image)             \
+  {                                                                           \
+    GET_IMAGE(image, surface_id);                                             \
+    return __gen_ocl_get_image_channel_data_type(surface_id);                 \
+  }                                                                           \
+  OVERLOADABLE  int get_image_channel_order(image_type image)                 \
+  {                                                                           \
+    GET_IMAGE(image, surface_id);                                             \
+    return __gen_ocl_get_image_channel_order(surface_id);                     \
+  }                                                                           \
+  OVERLOADABLE int get_image_width(image_type image)                          \
+  {                                                                           \
+    GET_IMAGE(image, surface_id);                                             \
+    return __gen_ocl_get_image_width(surface_id);                             \
+  }
 
-// 2D Info
+DECL_IMAGE_INFO_COMMON(image1d_t)
+DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
+DECL_IMAGE_INFO_COMMON(image1d_array_t)
 DECL_IMAGE_INFO_COMMON(image2d_t)
+DECL_IMAGE_INFO_COMMON(image3d_t)
+DECL_IMAGE_INFO_COMMON(image2d_array_t)
+
+// 2D extra Info
 OVERLOADABLE int get_image_height(image2d_t image)
 {
   GET_IMAGE(image, surface_id);
@@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t image)
 {
   return (int2){get_image_width(image), get_image_height(image)};
 }
+// End of 2D
 
-// 1D Array info
-DECL_IMAGE_INFO_COMMON(image1d_array_t)
-OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
-{
-  GET_IMAGE(image, surface_id);
-  return __gen_ocl_get_image_depth(surface_id);
-}
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDI
-#undef EXPEND_READ_COORDF
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 2D and 1D Array
-
-// 3D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
-                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
-                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
-                                                  dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord)                             \
-  {                                                             \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
-      tmpCoord.s0 += -0x1p-9f;                                   \
-    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
-      tmpCoord.s1 += -0x1p-9f;                                   \
-    if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)              \
-      tmpCoord.s2 += -0x1p-9f;                                   \
-  }
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
-DECL_IMAGE(0, image3d_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
-DECL_IMAGE(0, image3d_t, float4, f, 3)
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
-                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
-                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
-
-#define FIXUP_FLOAT_COORD(tmpCoord)                             \
-  {                                                             \
-    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)              \
-      tmpCoord.s0 += -0x1p-9f;                                   \
-    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)              \
-      tmpCoord.s1 += -0x1p-9f;                                   \
-  }
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
-  coord_type ai = __gen_compute_array_index(coord.s2, image);
-
-// 2D Array
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
-DECL_IMAGE(0, image2d_array_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3)
-DECL_IMAGE(0, image2d_array_t, float4, f, 3)
-
-// 3D Info
-DECL_IMAGE_INFO_COMMON(image3d_t)
+// 3D extra Info
 OVERLOADABLE int get_image_height(image3d_t image)
 {
   GET_IMAGE(image, surface_id);
@@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t image)
 }
 OVERLOADABLE int4 get_image_dim(image3d_t image)
 {
-  return (int4){get_image_width(image), get_image_height(image), get_image_depth(image), 0};
+  return (int4) (get_image_width(image),
+                 get_image_height(image),
+                 get_image_depth(image),
+                 0);
 }
 
-// 2D Array Info
-DECL_IMAGE_INFO_COMMON(image2d_array_t)
+// 2D Array extra Info
 OVERLOADABLE int get_image_height(image2d_array_t image)
 {
   GET_IMAGE(image, surface_id);
@@ -409,21 +615,10 @@ OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
   return __gen_ocl_get_image_depth(surface_id);
 }
 
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 3D and 2D Array
-
-#undef DECL_IMAGE
-#undef DECL_READ_IMAGE
-#undef DECL_READ_IMAGE_NOSAMPLER
-#undef DECL_WRITE_IMAGE
-#undef GEN_FIX_1
-// End of Image
-
-
-#undef GET_IMAGE
+// 1D Array info
+OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  return __gen_ocl_get_image_depth(surface_id);
+}
+// End of 1DArray
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index a438f09..afaa4a5 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -257,9 +257,10 @@ namespace gbe
   /*! Get number of element to process dealing either with a vector or a scalar
    *  value
    */
-  static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false)
+  static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t &elemNum, bool useUnsigned = false)
   {
     ir::Type type;
+    Type *llvmType = value->getType();
     if (llvmType->isVectorTy() == true) {
       VectorType *vectorType = cast<VectorType>(llvmType);
       Type *elementType = vectorType->getElementType();
@@ -629,6 +630,7 @@ namespace gbe
     void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
 
     uint8_t appendSampler(CallSite::arg_iterator AI);
+    uint8_t getImageID(CallInst &I);
 
     // These instructions are not supported at all
     void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
@@ -2526,8 +2528,8 @@ namespace gbe
         Value *srcValue = I.getOperand(0);
         Value *dstValue = &I;
         uint32_t srcElemNum = 0, dstElemNum = 0 ;
-        ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, srcElemNum);
-        ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, dstElemNum);
+        ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum);
+        ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum);
         // As long and double are not compatible in register storage
         // and we do not support double yet, simply put an assert here
         GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == ir::TYPE_DOUBLE));
@@ -2927,7 +2929,7 @@ namespace gbe
       {
         // dst is a 4 elements vector. We allocate all 4 registers here.
         uint32_t elemNum;
-        (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
+        (void)getVectorInfo(ctx, &I, elemNum);
         GBE_ASSERT(elemNum == 4);
         this->newRegister(&I);
         break;
@@ -3055,6 +3057,15 @@ namespace gbe
     return index;
   }
 
+  uint8_t GenWriter::getImageID(CallInst &I) {
+    PtrOrigMapIter iter = pointerOrigMap.find(&I);
+    GBE_ASSERT(iter != pointerOrigMap.end());
+    SmallVectorImpl<Value *> &origins = iter->second;
+    GBE_ASSERT(origins.size() == 1);
+    const ir::Register imageReg = this->getRegister(origins[0]);
+    return ctx.getFunction().getImageSet()->getIdx(imageReg);
+  }
+
   void GenWriter::emitCallInst(CallInst &I) {
     if (Function *F = I.getCalledFunction()) {
       if (F->getIntrinsicID() != 0) {
@@ -3218,7 +3229,6 @@ namespace gbe
           default: NOT_IMPLEMENTED;
         }
       } else {
-        int image_dim;
         // Get the name of the called function and handle it
         Value *Callee = I.getCalledValue();
         const std::string fnName = Callee->getName();
@@ -3334,13 +3344,13 @@ namespace gbe
           case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
           case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
           {
-            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
+            const uint8_t imageID = getImageID(I);
+            GBE_ASSERT(AI != AE); ++AI;
             const ir::Register reg = this->getRegister(&I, 0);
             int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
-            const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
-            ir::ImageInfoKey key(surfaceID, infoType);
+            ir::ImageInfoKey key(imageID, infoType);
             const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx);
-            ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
+            ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
             break;
           }
 
@@ -3350,69 +3360,75 @@ namespace gbe
           case GEN_OCL_READ_IMAGE_I_1D_I:
           case GEN_OCL_READ_IMAGE_UI_1D_I:
           case GEN_OCL_READ_IMAGE_F_1D_I:
-            image_dim = 1;
-            goto handle_read_image;
           case GEN_OCL_READ_IMAGE_I_2D:
           case GEN_OCL_READ_IMAGE_UI_2D:
           case GEN_OCL_READ_IMAGE_F_2D:
           case GEN_OCL_READ_IMAGE_I_2D_I:
           case GEN_OCL_READ_IMAGE_UI_2D_I:
           case GEN_OCL_READ_IMAGE_F_2D_I:
-            image_dim = 2;
-            goto handle_read_image;
           case GEN_OCL_READ_IMAGE_I_3D:
           case GEN_OCL_READ_IMAGE_UI_3D:
           case GEN_OCL_READ_IMAGE_F_3D:
           case GEN_OCL_READ_IMAGE_I_3D_I:
           case GEN_OCL_READ_IMAGE_UI_3D_I:
           case GEN_OCL_READ_IMAGE_F_3D_I:
-            image_dim = 3;
-handle_read_image:
           {
-            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
-            const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
+            const uint8_t imageID = getImageID(I);
+            GBE_ASSERT(AI != AE); ++AI;
             GBE_ASSERT(AI != AE);
             const uint8_t sampler = this->appendSampler(AI);
-            ++AI;
-
-            ir::Register ucoord;
-            ir::Register vcoord;
-            ir::Register wcoord;
-
-            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
-            if (image_dim > 1) {
-              GBE_ASSERT(AI != AE);
-              vcoord = this->getRegister(*AI);
-              ++AI;
-            } else {
-              vcoord = ir::ocl::invalid;
-            }
-
-            if (image_dim > 2) {
-              GBE_ASSERT(AI != AE);
-              wcoord = this->getRegister(*AI);
-              ++AI;
-            } else {
-              wcoord = ir::ocl::invalid;
-            }
+            ++AI; GBE_ASSERT(AI != AE);
+            uint32_t coordNum;
+            (void)getVectorInfo(ctx, *AI, coordNum);
+            if (coordNum == 4)
+              coordNum = 3;
+            const uint32_t imageDim = coordNum;
+            GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
 
-            vector<ir::Register> dstTupleData, srcTupleData;
-            const uint32_t elemNum = 4;
-            for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
-              const ir::Register reg = this->getRegister(&I, elemID);
-              dstTupleData.push_back(reg);
-            }
-            srcTupleData.push_back(ucoord);
-            srcTupleData.push_back(vcoord);
-            srcTupleData.push_back(wcoord);
             uint8_t samplerOffset = 0;
+            Value *coordVal = *AI;
+            ++AI; GBE_ASSERT(AI != AE);
+            Value *samplerOffsetVal = *AI;
 #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
-            GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
+            Constant *CPV = dyn_cast<Constant>(samplerOffsetVal);
             assert(CPV);
             const ir::Immediate &x = processConstantImm(CPV);
             GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
             samplerOffset = x.getIntegerValue();
 #endif
+            bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
+            bool requiredFloatCoord = samplerOffset == 0;
+
+            vector<ir::Register> dstTupleData, srcTupleData;
+            for (uint32_t elemID = 0; elemID < 3; elemID++) {
+              ir::Register reg;
+
+              if (elemID < imageDim)
+                reg = this->getRegister(coordVal, elemID);
+              else
+                reg = ir::ocl::invalid;
+
+              if (isFloatCoord == requiredFloatCoord)
+                srcTupleData.push_back(reg);
+              else if (!requiredFloatCoord) {
+                ir::Register intCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+                ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg);
+                srcTupleData.push_back(intCoordReg);
+              } else {
+                ir::Register floatCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+                ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg);
+                srcTupleData.push_back(floatCoordReg);
+              }
+            }
+
+            uint32_t elemNum;
+            (void)getVectorInfo(ctx, &I, elemNum);
+            GBE_ASSERT(elemNum == 4);
+
+            for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+              const ir::Register reg = this->getRegister(&I, elemID);
+              dstTupleData.push_back(reg);
+            }
             const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
             const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
 
@@ -3445,58 +3461,46 @@ handle_read_image:
                 GBE_ASSERT(0); // never been here.
             }
 
-            bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
-
-            ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
-                       isFloatCoord, sampler, samplerOffset);
+            ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
+                       requiredFloatCoord, sampler, samplerOffset);
             break;
           }
 
           case GEN_OCL_WRITE_IMAGE_I_1D:
           case GEN_OCL_WRITE_IMAGE_UI_1D:
           case GEN_OCL_WRITE_IMAGE_F_1D:
-            image_dim = 1;
-            goto handle_write_image;
           case GEN_OCL_WRITE_IMAGE_I_2D:
           case GEN_OCL_WRITE_IMAGE_UI_2D:
           case GEN_OCL_WRITE_IMAGE_F_2D:
-            image_dim = 2;
-            goto handle_write_image;
           case GEN_OCL_WRITE_IMAGE_I_3D:
           case GEN_OCL_WRITE_IMAGE_UI_3D:
           case GEN_OCL_WRITE_IMAGE_F_3D:
-            image_dim = 3;
-handle_write_image:
           {
-            GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
-            const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
-            ir::Register ucoord, vcoord, wcoord;
-
-            GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+            const uint8_t imageID = getImageID(I);
+            GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
+            uint32_t coordNum;
+            (void)getVectorInfo(ctx, *AI, coordNum);
+            if (coordNum == 4)
+              coordNum = 3;
+            const uint32_t imageDim = coordNum;
+            vector<ir::Register> srcTupleData;
+            GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
 
-            if (image_dim > 1) {
-              GBE_ASSERT(AI != AE);
-              vcoord = this->getRegister(*AI);
-              ++AI;
-            } else
-              vcoord = ir::ocl::invalid;
-
-            if (image_dim > 2) {
-              GBE_ASSERT(AI != AE);
-              wcoord = this->getRegister(*AI);
-              ++AI;
-            } else {
-              wcoord = ir::ocl::invalid;
-            }
+            for (uint32_t elemID = 0; elemID < 3; elemID++) {
+              ir::Register reg;
 
-            GBE_ASSERT(AI != AE);
-            vector<ir::Register> srcTupleData;
+              if (elemID < imageDim)
+                reg = this->getRegister(*AI, elemID);
+              else
+                reg = ir::ocl::invalid;
 
-            srcTupleData.push_back(ucoord);
-            srcTupleData.push_back(vcoord);
-            srcTupleData.push_back(wcoord);
+              srcTupleData.push_back(reg);
+            }
+            ++AI; GBE_ASSERT(AI != AE);
+            uint32_t elemNum;
+            (void)getVectorInfo(ctx, *AI, elemNum);
+            GBE_ASSERT(elemNum == 4);
 
-            const uint32_t elemNum = 4;
             for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
               const ir::Register reg = this->getRegister(*AI, elemID);
               srcTupleData.push_back(reg);
@@ -3523,7 +3527,7 @@ handle_write_image:
                 GBE_ASSERT(0); // never been here.
             }
 
-            ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
+            ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
             break;
           }
           case GEN_OCL_MUL_HI_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7434c78..8d55c3f 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj)
 // work around read image with the LD message. The coords are integer type.
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
 DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij)
 
 // To write_image functions.
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
 DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
 
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
 
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjiiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
 
 // To get image info function
 DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 5450a2b..baf526b 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -648,7 +648,7 @@ namespace gbe {
 
         // Get the function arguments
         CallSite CS(call);
-        CallSite::arg_iterator CI = CS.arg_begin() + 2;
+        CallSite::arg_iterator CI = CS.arg_begin() + 1;
 
         switch (it->second) {
           default: break;
@@ -661,8 +661,7 @@ namespace gbe {
           case GEN_OCL_READ_IMAGE_I_3D:
           case GEN_OCL_READ_IMAGE_UI_3D:
           case GEN_OCL_READ_IMAGE_F_3D:
-
-	  case GEN_OCL_READ_IMAGE_I_1D_I:
+          case GEN_OCL_READ_IMAGE_I_1D_I:
           case GEN_OCL_READ_IMAGE_UI_1D_I:
           case GEN_OCL_READ_IMAGE_F_1D_I:
           case GEN_OCL_READ_IMAGE_I_2D_I:
@@ -674,6 +673,9 @@ namespace gbe {
           case GEN_OCL_GET_IMAGE_WIDTH:
           case GEN_OCL_GET_IMAGE_HEIGHT:
           {
+            ++CI;
+            if ((*CI)->getType()->isVectorTy()) 
+              *CI = InsertToVector(call, *CI);
             setAppendPoint(call);
             extractFromVector(call);
             break;
@@ -681,15 +683,16 @@ namespace gbe {
           case GEN_OCL_WRITE_IMAGE_I_3D:
           case GEN_OCL_WRITE_IMAGE_UI_3D:
           case GEN_OCL_WRITE_IMAGE_F_3D:
-            CI++;
           case GEN_OCL_WRITE_IMAGE_I_2D:
           case GEN_OCL_WRITE_IMAGE_UI_2D:
           case GEN_OCL_WRITE_IMAGE_F_2D:
-            CI++;
           case GEN_OCL_WRITE_IMAGE_I_1D:
           case GEN_OCL_WRITE_IMAGE_UI_1D:
           case GEN_OCL_WRITE_IMAGE_F_1D:
           {
+            if ((*CI)->getType()->isVectorTy()) 
+              *CI = InsertToVector(call, *CI);
+            ++CI;
             *CI = InsertToVector(call, *CI);
             break;
           }
-- 
1.8.3.2



More information about the Beignet mailing list