[Beignet] [PATCH v3 2/3] GBE/runtime: fixup broken 1d array image support.

Zhigang Gong zhigang.gong at intel.com
Thu Jun 19 00:44:43 PDT 2014


As sample LD message doesn't support array index, we have
to create a 2D array surface with the same buffer object.
Thus one 1D array image will have two surfaces binded to it
one is the index and the second is 128 + index.

And then at kernel side, we will access the corresponding
2D array surface when the LD message is required otherwise
will access the origin 1D array surface.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   9 +-
 backend/src/ir/instruction.cpp             |   2 +-
 backend/src/ocl_stdlib.tmpl.h              | 161 +++++++++++++++++++----------
 src/cl_api.c                               |   5 +-
 src/cl_command_queue.c                     |   5 +
 src/cl_device_id.c                         |   1 +
 src/cl_device_id.h                         |   1 +
 src/cl_gt_device.h                         |   1 +
 src/cl_mem.c                               |  29 +++---
 src/intel/intel_gpgpu.c                    |  12 ++-
 10 files changed, 153 insertions(+), 73 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index ecb64cd..986aa3e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3606,10 +3606,15 @@ namespace gbe
           msgPayloads[valueID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
         msgLen = srcNum;
       }
-      uint32_t bti = insn.getImageIndex();
+      // We switch to a fixup bti for linear filter on a image1d array sampling.
+      uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
+      if (bti > 253) {
+        std::cerr << "Too large bti " << bti;
+        return false;
+      }
       uint32_t sampler = insn.getSamplerIndex();
 
-      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset());
+      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
       return true;
     }
     DECL_CTOR(SampleInstruction, 1, 1);
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index d081235..435869e 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -527,7 +527,7 @@ namespace ir {
       uint8_t srcIsFloat:1;
       uint8_t dstIsFloat:1;
       uint8_t samplerIdx:4;
-      uint8_t samplerOffset:1;
+      uint8_t samplerOffset:2;
       uint8_t imageIdx;
       static const uint32_t srcNum = 3;
       static const uint32_t dstNum = 4;
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 605d96d..c43172d 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4566,24 +4566,18 @@ OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, in
 
 // 2D & 1D Array read
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, int i, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
 
 // 3D & 2D Array read
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
 OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
 OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, int i, uint sampler_offset);
 OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
 
 // 1D write
@@ -4606,6 +4600,9 @@ int __gen_ocl_get_image_height(uint surface_id);
 int __gen_ocl_get_image_channel_data_type(uint surface_id);
 int __gen_ocl_get_image_channel_order(uint surface_id);
 int __gen_ocl_get_image_depth(uint surface_id);
+/* The printf function. */
+int __gen_ocl_printf_stub(const char * format, ...);
+#define printf __gen_ocl_printf_stub
 
 // 2D 3D Image Common Macro
 #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
@@ -4616,21 +4613,49 @@ int __gen_ocl_get_image_depth(uint surface_id);
 
 #define GET_IMAGE(cl_image, surface_id) \
     uint surface_id = (uint)cl_image
+INLINE_OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  float array_size = __gen_ocl_get_image_depth(surface_id);
+  return clamp(rint(index), 0.f, array_size - 1.f);
+}
+
+INLINE_OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  float array_size = __gen_ocl_get_image_depth(surface_id);
+  return clamp(rint(index), 0.f, array_size - 1.f);
+}
+
+INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  int array_size = __gen_ocl_get_image_depth(surface_id);
+  return clamp(index, 0, array_size - 1);
+}
 
-#define DECL_READ_IMAGE0(int_clamping_fix,          \
+INLINE_OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
+{
+  GET_IMAGE(image, surface_id);
+  int array_size = __gen_ocl_get_image_depth(surface_id);
+  return clamp(index, 0, array_size - 1);
+}
+
+#define DECL_READ_IMAGE0(int_clamping_fix,                                   \
                         image_type, type, suffix, coord_type, n)             \
   INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
                                                const sampler_t sampler,      \
                                                coord_type coord)             \
   {                                                                          \
     GET_IMAGE(cl_image, surface_id);                                         \
+    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai);                          \
     if (int_clamping_fix &&                                                  \
         ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&             \
         ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST))               \
             return   __gen_ocl_read_image ##suffix(                          \
-                        EXPEND_READ_COORD(surface_id, sampler, coord), 1);   \
+                        EXPEND_READ_COORD(surface_id, sampler, coord));      \
     return  __gen_ocl_read_image ##suffix(                                   \
-                    EXPEND_READ_COORD(surface_id, sampler, (float)coord), 0);\
+                    EXPEND_READ_COORDF(surface_id, sampler, coord), 0);      \
   }
 
 #define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,         \
@@ -4640,6 +4665,7 @@ int __gen_ocl_get_image_depth(uint surface_id);
                                                coord_type coord)             \
   {                                                                          \
     GET_IMAGE(cl_image, surface_id);                                         \
+    GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai)                         \
     coord_type tmpCoord = coord;                                             \
     if (float_coord_rounding_fix | int_clamping_fix) {                       \
       if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)              \
@@ -4655,12 +4681,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
             } else                                                           \
               intCoord = tmpCoord;                                           \
             return   __gen_ocl_read_image ##suffix(                          \
-                       EXPEND_READ_COORD1(surface_id, sampler, intCoord), 1);\
+                       EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
        }                                                                     \
       }                                                                      \
     }                                                                        \
     return  __gen_ocl_read_image ##suffix(                                   \
-                        EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
+                        EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
   }
 
 #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n)   \
@@ -4668,11 +4694,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
                                                coord_type coord)             \
   {                                                                          \
     GET_IMAGE(cl_image, surface_id);                                         \
+    GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai)                           \
     return __gen_ocl_read_image ##suffix(                                    \
-           EXPEND_READ_COORD(surface_id,                                     \
+           EXPEND_READ_COORDF(surface_id,                                    \
                              CLK_NORMALIZED_COORDS_FALSE                     \
                              | CLK_ADDRESS_NONE                              \
-                             | CLK_FILTER_NEAREST, (float)coord), 0);               \
+                             | CLK_FILTER_NEAREST, (float)coord), 0);        \
   }
 
 #define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
@@ -4707,16 +4734,12 @@ int __gen_ocl_get_image_depth(uint surface_id);
   DECL_WRITE_IMAGE(image_type, type, suffix, int)                                    \
   DECL_WRITE_IMAGE(image_type, type, suffix, float)
 
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
 #define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
-
-#define OUT_OF_BOX(coord, surface, normalized)                   \
-  (coord < 0 ||                                                  \
-   ((normalized == 0)                                            \
-     && (coord >= __gen_ocl_get_image_width(surface)))           \
-   || ((normalized != 0) && (coord > 0x1p0)))
+#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
 
 #define FIXUP_FLOAT_COORD(tmpCoord)                            \
   {                                                            \
@@ -4732,10 +4755,10 @@ DECL_IMAGE(0, image1d_t, float4, f)
 DECL_IMAGE_INFO_COMMON(image1d_t)
 
 #undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
 #undef DENORMALIZE_COORD
 #undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
 #undef FIXUP_FLOAT_COORD
 #undef DECL_IMAGE
 // End of 1D
@@ -4747,20 +4770,14 @@ DECL_IMAGE_INFO_COMMON(image1d_t)
   DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)                                  \
   DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
 // 2D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
-                                               (int)(coord.s1 < 0 ? -1 : coord.s1)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
+                                               (int)(coord.s1 < 0 ? -1 : coord.s1), 1
 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
                                                   dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
 #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
 
-#define OUT_OF_BOX(coord, surface, normalized)                   \
-  (coord.s0 < 0 || coord.s1 < 0 ||                               \
-   ((normalized == 0)                                            \
-     && (coord.s0 >= __gen_ocl_get_image_width(surface)          \
-         || coord.s1 >= __gen_ocl_get_image_height(surface)))    \
-   || ((normalized != 0) && (coord.s0 > 0x1p0 || coord.s1 > 0x1p0)))
-
 #define FIXUP_FLOAT_COORD(tmpCoord)                            \
   {                                                            \
     if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
@@ -4774,6 +4791,28 @@ DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
 DECL_IMAGE(0, image2d_t, float4, f, 2)
 
 // 1D Array
+#undef GET_IMAGE_ARRAY_SIZE
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef FIXUP_FLOAT_COORD
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
+#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
+  coord_type ai = __gen_compute_array_index(coord.s1, image);
+
+#define FIXUP_FLOAT_COORD(tmpCoord)                            \
+  {                                                            \
+    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)            \
+      tmpCoord.s0 += -0x1p-9;                                  \
+  }
+
 DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
 DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
 DECL_IMAGE(0, image1d_array_t, float4, f, 2)
@@ -4799,29 +4838,23 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
 }
 
 #undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDI
+#undef EXPEND_READ_COORDF
 #undef DENORMALIZE_COORD
 #undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
 #undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
 // End of 2D and 1D Array
 
 // 3D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
-#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
-                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
+                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
 #define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
                                                   dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
                                                   dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
 #define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
-#define OUT_OF_BOX(coord, surface, normalized)                  \
-  (coord.s0 < 0 || coord.s1 < 0 || coord.s2 < 0 ||              \
-   ((normalized == 0)                                           \
-     && (coord.s0 >= __gen_ocl_get_image_width(surface)         \
-         || coord.s1 >= __gen_ocl_get_image_height(surface)     \
-         || coord.s2 >= __gen_ocl_get_image_depth(surface)))    \
-   || ((normalized != 0)                                        \
-        &&(coord.s0 > 1 || coord.s1 > 1 || coord.s2 > 1)))
 
 #define FIXUP_FLOAT_COORD(tmpCoord)                             \
   {                                                             \
@@ -4832,6 +4865,7 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
     if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20)              \
       tmpCoord.s2 += -0x1p-9;                                   \
   }
+#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
 
 DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
 DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
@@ -4841,6 +4875,32 @@ DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
 DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
 DECL_IMAGE(0, image3d_t, float4, f, 3)
 
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
+#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
+#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
+                                               (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
+                                                  dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
+
+#define FIXUP_FLOAT_COORD(tmpCoord)                             \
+  {                                                             \
+    if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20)              \
+      tmpCoord.s0 += -0x1p-9;                                   \
+    if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20)              \
+      tmpCoord.s1 += -0x1p-9;                                   \
+  }
+#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
+  coord_type ai = __gen_compute_array_index(coord.s2, image);
+
 // 2D Array
 DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
 DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
@@ -4885,11 +4945,12 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
 }
 
 #undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORD1
+#undef EXPEND_READ_COORDF
+#undef EXPEND_READ_COORDI
 #undef DENORMALIZE_COORD
 #undef EXPEND_WRITE_COORD
-#undef OUT_OF_BOX
 #undef FIXUP_FLOAT_COORD
+#undef GET_IMAGE_ARRAY_SIZE
 // End of 3D and 2D Array
 
 #undef DECL_IMAGE
@@ -5066,8 +5127,4 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
 #undef OVERLOADABLE
 #undef INLINE
 
-/* The printf function. */
-int __gen_ocl_printf_stub(const char * format, ...);
-#define printf __gen_ocl_printf_stub
-
 #endif /* __GEN_OCL_STDLIB_H__ */
diff --git a/src/cl_api.c b/src/cl_api.c
index b17cc52..9e412f6 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -674,7 +674,10 @@ clGetSupportedImageFormats(cl_context         ctx,
     err = CL_INVALID_VALUE;
     goto error;
   }
-  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
+  if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D &&
+               image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY &&
+               image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY &&
+               image_type != CL_MEM_OBJECT_IMAGE2D &&
                image_type != CL_MEM_OBJECT_IMAGE3D)) {
     err = CL_INVALID_VALUE;
     goto error;
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 1bc97ac..41281f2 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -137,6 +137,11 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
                         image->intel_fmt, image->image_type,
                         image->w, image->h, image->depth,
                         image->row_pitch, image->tiling);
+    if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+      cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
+                          image->intel_fmt, image->image_type,
+                          image->w, image->h, image->depth,
+                          image->row_pitch, image->tiling);
   }
   return CL_SUCCESS;
 }
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index af8e90c..578b548 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -363,6 +363,7 @@ cl_get_device_info(cl_device_id     device,
     DECL_FIELD(IMAGE_SUPPORT, image_support)
     DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args)
     DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args)
+    DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size)
     DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width)
     DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height)
     DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width)
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index a5449a7..769bfd2 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -51,6 +51,7 @@ struct _cl_device_id {
   cl_uint  max_read_image_args;
   cl_uint  max_write_image_args;
   size_t   image2d_max_width;
+  size_t   image_max_array_size;
   size_t   image2d_max_height;
   size_t   image3d_max_width;
   size_t   image3d_max_height;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index b8bda5e..6d03123 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -41,6 +41,7 @@
 .image_support = CL_TRUE,
 .max_read_image_args = 128,
 .max_write_image_args = 8,
+.image_max_array_size = 2048,
 .image2d_max_width = 8192,
 .image2d_max_height = 8192,
 .image3d_max_width = 8192,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 491993e..a7a0f59 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -540,7 +540,7 @@ static cl_mem
 _cl_mem_new_image(cl_context ctx,
                   cl_mem_flags flags,
                   const cl_image_format *fmt,
-                  const cl_mem_object_type image_type,
+                  const cl_mem_object_type orig_image_type,
                   size_t w,
                   size_t h,
                   size_t depth,
@@ -551,6 +551,7 @@ _cl_mem_new_image(cl_context ctx,
 {
   cl_int err = CL_SUCCESS;
   cl_mem mem = NULL;
+  cl_mem_object_type image_type = orig_image_type;
   uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
   size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
   cl_image_tiling_t tiling = CL_NO_TILE;
@@ -584,8 +585,7 @@ _cl_mem_new_image(cl_context ctx,
       image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY)))
     DO_IMAGE_ERROR;
 
-  if (image_type == CL_MEM_OBJECT_IMAGE1D ||
-      image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
     size_t min_pitch = bpp * w;
     if (data && pitch == 0)
       pitch = min_pitch;
@@ -596,7 +596,7 @@ _cl_mem_new_image(cl_context ctx,
     else if (data && slice_pitch == 0)
       slice_pitch = pitch;
     if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
-    if (UNLIKELY(depth > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+    if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
@@ -617,7 +617,14 @@ _cl_mem_new_image(cl_context ctx,
 
     depth = 1;
   } else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
+             image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
              image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+      h = 1;
+      tiling = CL_NO_TILE;
+    } else if (cl_driver_get_ver(ctx->drv) != 6)
+      tiling = cl_get_default_tiling();
+
     size_t min_pitch = bpp * w;
     if (data && pitch == 0)
       pitch = min_pitch;
@@ -626,15 +633,14 @@ _cl_mem_new_image(cl_context ctx,
       slice_pitch = min_slice_pitch;
     if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR;
     if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR;
-    if (UNLIKELY(depth > ctx->device->image3d_max_depth)) DO_IMAGE_ERROR;
+    if (image_type == CL_MEM_OBJECT_IMAGE3D &&
+       (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR
+    else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
 
-    /* Pick up tiling mode (we do only linear on SNB) */
-    if (cl_driver_get_ver(ctx->drv) != 6)
-      tiling = cl_get_default_tiling();
   } else
     assert(0);
 
@@ -643,12 +649,7 @@ _cl_mem_new_image(cl_context ctx,
   /* Tiling requires to align both pitch and height */
   if (tiling == CL_NO_TILE) {
     aligned_pitch = w * bpp;
-    if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
-        image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
-        image_type == CL_MEM_OBJECT_IMAGE3D)
-      aligned_h = ALIGN(h, valign);
-    else
-      aligned_h     = h;
+    aligned_h  = ALIGN(h, valign);
   } else if (tiling == CL_TILE_X) {
     aligned_pitch = ALIGN(w * bpp, tilex_w);
     aligned_h     = ALIGN(h, tilex_h);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 197d388..c98a440 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -91,7 +91,7 @@ struct intel_gpgpu
 
   unsigned long img_bitmap;              /* image usage bitmap. */
   unsigned int img_index_base;          /* base index for image surface.*/
-  drm_intel_bo *binded_img[max_img_n];  /* all images binded for the call */
+  drm_intel_bo *binded_img[max_img_n + 128];  /* all images binded for the call */
 
   unsigned long sampler_bitmap;          /* sampler usage bitmap. */
 
@@ -764,7 +764,10 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
   memset(ss, 0, sizeof(*ss));
 
   ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
-  ss->ss0.surface_type = intel_get_surface_type(type);
+  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+    ss->ss0.surface_type = I965_SURFACE_2D;
+  else
+    ss->ss0.surface_type = intel_get_surface_type(type);
   if (intel_is_surface_array(type)) {
     ss->ss0.surface_array = 1;
     ss->ss0.surface_array_spacing = 1;
@@ -811,7 +814,10 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
   memset(ss, 0, sizeof(*ss));
 
   ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
-  ss->ss0.surface_type = intel_get_surface_type(type);
+  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+    ss->ss0.surface_type = I965_SURFACE_2D;
+  else
+    ss->ss0.surface_type = intel_get_surface_type(type);
   if (intel_is_surface_array(type)) {
     ss->ss0.surface_array = 1;
     ss->ss0.surface_array_spacing = 1;
-- 
1.8.3.2



More information about the Beignet mailing list