[Beignet] [PATCH] GBE: switch to use CLANG native image types.

Yang, Rong R rong.r.yang at intel.com
Wed Dec 17 23:41:55 PST 2014


One minor comment.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Zhigang Gong
> Sent: Monday, December 15, 2014 09:05
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH] GBE: switch to use CLANG native image types.
> 
> CLANG has all native image types since 3.3. There is no
> need to keep the original hacky implementation now.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/ir/function.hpp                |  24 +++
>  backend/src/ir/instruction.cpp             |   1 -
>  backend/src/ir/instruction.hpp             |   1 -
>  backend/src/libocl/include/ocl_types.h     |  21 +--
>  backend/src/libocl/src/ocl_image.cl        | 275 +++++++++--------------------
>  backend/src/llvm/llvm_gen_backend.cpp      | 170 ++++--------------
>  backend/src/llvm/llvm_gen_backend.hpp      |  24 ++-
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  36 +---
>  backend/src/llvm/llvm_scalarize.cpp        |  47 +----
>  9 files changed, 175 insertions(+), 424 deletions(-)
> 
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
> index 1163a19..0f86fef 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -180,6 +180,30 @@ namespace ir {
>        std::string accessQual;
>        std::string typeQual;
>        std::string argName; // My different from arg->getName()
> +
> +      bool isImage1dT() const {
> +        return typeName.compare("image1d_t") == 0;
> +      }
> +      bool isImage1dArrayT() const {
> +        return typeName.compare("image1d_array_t") == 0;
> +      }
> +      bool isImage1dBufferT() const {
> +        return typeName.compare("image1d_buffer_t") == 0;
> +      }
> +      bool isImage2dT() const {
> +        return typeName.compare("image2d_t") == 0;
> +      }
> +      bool isImage2dArrayT() const {
> +        return typeName.compare("image2d_array_t") == 0;
> +      }
> +      bool isImage3dT() const {
> +        return typeName.compare("image3d_t") == 0;
> +      }
> +
> +      bool isImageType() const {
> +        return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
> +               isImage2dT() || isImage2dArrayT() || isImage3dT();
> +      }
>      };
> 
>      /*! Create a function input argument */
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 2bd0061..82e7dda 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -1255,7 +1255,6 @@ namespace ir {
>        case MEM_LOCAL: return out << "local";
>        case MEM_CONSTANT: return out << "constant";
>        case MEM_PRIVATE: return out << "private";
> -      case IMAGE: return out << "image";
>        case MEM_INVALID: return out << "invalid";
>      };
>      return out;
> diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
> index 11e9509..47312f5 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -58,7 +58,6 @@ namespace ir {
>      MEM_LOCAL,      //!< Local memory (thread group memory)
>      MEM_CONSTANT,   //!< Immutable global memory
>      MEM_PRIVATE,    //!< Per thread private memory
> -    IMAGE,          //!< For texture image.
>      MEM_INVALID
>    };
> 
> diff --git a/backend/src/libocl/include/ocl_types.h
> b/backend/src/libocl/include/ocl_types.h
> index 63478c9..49ac907 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -87,27 +87,8 @@ DEF(double);
>  // FIXME:
>  // This is a transitional hack to bypass the LLVM 3.3 built-in types.
>  // See the Khronos SPIR specification for handling of these types.
> -#define __texture __attribute__((address_space(4)))
> -struct _image1d_t;
> -typedef __texture struct _image1d_t* __image1d_t;
> -struct _image1d_buffer_t;
> -typedef __texture struct _image1d_buffer_t* __image1d_buffer_t;
> -struct _image1d_array_t;
> -typedef __texture struct _image1d_array_t* __image1d_array_t;
> -struct _image2d_t;
> -typedef __texture struct _image2d_t* __image2d_t;
> -struct _image2d_array_t;
> -typedef __texture struct _image2d_array_t* __image2d_array_t;
> -struct _image3d_t;
> -typedef __texture struct _image3d_t* __image3d_t;
> -typedef const ushort __sampler_t;
> -#define image1d_t __image1d_t
> -#define image1d_buffer_t __image1d_buffer_t
> -#define image1d_array_t __image1d_array_t
> -#define image2d_t __image2d_t
> -#define image2d_array_t __image2d_array_t
> -#define image3d_t __image3d_t
>  #define sampler_t __sampler_t
> +typedef const ushort __sampler_t;
> 
>  /////////////////////////////////////////////////////////////////////////////
>  // OpenCL built-in event types
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index 95b98ff..c4ca2f8 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -20,176 +20,94 @@
>  #include "ocl_integer.h"
>  #include "ocl_common.h"
> 
> +#define int1 int
> +#define float1 float
> +
> 
> //////////////////////////////////////////////////////////////////////////////
> /
>  // Beignet builtin functions.
> 
> //////////////////////////////////////////////////////////////////////////////
> /
> 
> -// 1D read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        float u, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        int u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          float u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          int u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          float u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          int u, uint sampler_offset);
> -
> -// 2D & 1D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        float2 coord, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        int2 coord, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          float2 coord, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          int2 coord, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          float2 coord, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          int2 coord, uint sampler_offset);
> -
> -// 3D & 2D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        float4 coord, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> -                                        int4 coord, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          float4 coord, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> -                                          int4 coord, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          float4 coord, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> -                                          int4 coord, uint sampler_offset);
> -
> -// Don't know why we need to support 3 component coordinates, but it's in
> the old
> -// version, let's keep to support it.
> -INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> -                                               float3 coord, uint sampler_offset)
> -{
> -   return __gen_ocl_read_imagei(surface_id, sampler,
> -            (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> -                                               int3 coord, uint sampler_offset)
> -{
> -  return __gen_ocl_read_imagei(surface_id, sampler,
> -           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> -                                                 float3 coord, uint sampler_offset)
> -{
> -  return __gen_ocl_read_imageui(surface_id, sampler,
> -           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> -                                                 int3 coord, uint sampler_offset)
> -{
> -  return __gen_ocl_read_imageui(surface_id, sampler,
> -           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> -                             float3 coord, uint sampler_offset)
> -{
> -  return __gen_ocl_read_imagef(surface_id, sampler,
> -           (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> -                                                 int3 coord, uint sampler_offset)
> -{
> -  return __gen_ocl_read_imagef(surface_id, sampler,
> -           (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> -}
> -
> -// 1D write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4
> color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4
> color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4
> color);
> -
> -// 2D & 1D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord,
> int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord,
> uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord,
> float4 color);
> -
> -// 3D & 2D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord,
> int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord,
> uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord,
> float4 color);
> -
> -INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3
> coord, int4 color)
> -{
> -  __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0),
> color);
> -}
> -INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3
> coord, uint4 color)
> -{
> -  __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2,
> 0), color);
> -}
> -INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3
> coord, float4 color)
> -{
> -  __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0),
> color);
> -}
> -
> -int __gen_ocl_get_image_width(uint surface_id);
> -int __gen_ocl_get_image_height(uint surface_id);
> -int __gen_ocl_get_image_channel_data_type(uint surface_id);
> -int __gen_ocl_get_image_channel_order(uint surface_id);
> -int __gen_ocl_get_image_depth(uint surface_id);
> -
> -
> -#define GET_IMAGE(cl_image, surface_id) \
> -    uint surface_id = (uint)cl_image
> -
> +#define DECL_GEN_OCL_RW_IMAGE(image_type, n) \
> +  OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image,
> sampler_t sampler,            \
> +                                          float ##n coord, uint sampler_offset);          \
> +  OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image,
> sampler_t sampler,            \
> +                                          int ##n coord, uint sampler_offset);            \
> +  OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image,
> sampler_t sampler,          \
> +                                            float ##n coord, uint sampler_offset);        \
> +  OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image,
> sampler_t sampler,          \
> +                                            int ##n coord, uint sampler_offset);          \
> +  OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image,
> sampler_t sampler,          \
> +                                            float ##n coord, uint sampler_offset);        \
> +  OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image,
> sampler_t sampler,          \
> +                                            int ##n coord, uint sampler_offset);          \
> +  OVERLOADABLE void __gen_ocl_write_imagei(image_type image, int ##n
> coord , int4 color); \
> +  OVERLOADABLE void __gen_ocl_write_imageui(image_type image, int
> ##n coord, uint4 color);\
> +  OVERLOADABLE void __gen_ocl_write_imagef(image_type image, int ##n
> coord, float4 color);
> +
> +#define DECL_GEN_OCL_QUERY_IMAGE(image_type) \
> +  OVERLOADABLE int __gen_ocl_get_image_width(image_type image);
> \
> +  OVERLOADABLE int __gen_ocl_get_image_height(image_type image);
> \
> +  OVERLOADABLE int
> __gen_ocl_get_image_channel_data_type(image_type image);               \
> +  OVERLOADABLE int __gen_ocl_get_image_channel_order(image_type
> image);                   \
> +  OVERLOADABLE int __gen_ocl_get_image_depth(image_type image);
> \
> +
> +DECL_GEN_OCL_RW_IMAGE(image1d_t, 1)
> +DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1)
> +DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2)
> +DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4)
> +DECL_GEN_OCL_RW_IMAGE(image2d_t, 2)
> +DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 3)
> +DECL_GEN_OCL_RW_IMAGE(image3d_t, 3)
> +DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 4)
> +DECL_GEN_OCL_RW_IMAGE(image3d_t, 4)
> +
> +DECL_GEN_OCL_QUERY_IMAGE(image1d_t)
> +DECL_GEN_OCL_QUERY_IMAGE(image1d_buffer_t)
> +DECL_GEN_OCL_QUERY_IMAGE(image1d_array_t)
> +DECL_GEN_OCL_QUERY_IMAGE(image2d_t)
> +DECL_GEN_OCL_QUERY_IMAGE(image2d_array_t)
> +DECL_GEN_OCL_QUERY_IMAGE(image3d_t)
> 
> //////////////////////////////////////////////////////////////////////////////
> /
>  // helper functions to validate array index.
> 
> //////////////////////////////////////////////////////////////////////////////
> /
>  INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord,
> image1d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  float array_size = __gen_ocl_get_image_depth(image);
>    coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
>    return coord;
>  }
> 
>  INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord,
> image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  float array_size = __gen_ocl_get_image_depth(image);
>    coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
>    return coord;
>  }
> 
>  INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord,
> image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  float array_size = __gen_ocl_get_image_depth(surface_id);
> +  float array_size = __gen_ocl_get_image_depth(image);
>    coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
>    return coord;
>  }
> 
>  INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord,
> image1d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  int array_size = __gen_ocl_get_image_depth(image);
>    coord.s1 = clamp(coord.s1, 0, array_size - 1);
>    return coord;
>  }
> 
>  INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord,
> image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  int array_size = __gen_ocl_get_image_depth(image);
>    coord.s2 = clamp(coord.s2, 0, array_size - 1);
>    return coord;
>  }
> 
>  INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord,
> image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  int array_size = __gen_ocl_get_image_depth(surface_id);
> +  int array_size = __gen_ocl_get_image_depth(image);
>    coord.s2 = clamp(coord.s2, 0, array_size - 1);
>    return coord;
>  }
> @@ -273,62 +191,54 @@ INLINE_OVERLOADABLE float4
> __gen_fixup_float_coord(float4 tmpCoord)
>  // coordiates.
>  INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t
> image, float srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return srcCoord * __gen_ocl_get_image_width(surface_id);
> +  return srcCoord * __gen_ocl_get_image_width(image);
>  }
> 
>  INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const
> image1d_array_t image, float2 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
>    return srcCoord;
>  }
> 
>  INLINE_OVERLOADABLE float __gen_denormalize_coord(const
> image1d_buffer_t image, float srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return srcCoord * __gen_ocl_get_image_width(surface_id);
> +  return srcCoord * __gen_ocl_get_image_width(image);
>  }
> 
>  INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t
> image, float2 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> -  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
>    return srcCoord;
>  }
> 
>  INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const
> image2d_array_t image, float3 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> -  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
>    return srcCoord;
>  }
> 
>  INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t
> image, float3 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> -  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> -  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
> +  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image);
>    return srcCoord;
>  }
> 
>  INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const
> image2d_array_t image, float4 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> -  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
>    return srcCoord;
>  }
> 
>  INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t
> image, float4 srcCoord)
>  {
> -  GET_IMAGE(image, surface_id);
> -  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> -  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> -  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> +  srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
> +  srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
> +  srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image);
>    return srcCoord;
>  }
> 
> @@ -381,11 +291,10 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>                                          const sampler_t sampler,              \
>                                          coord_type coord)                     \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord = __gen_validate_array_index(coord, cl_image);                      \
>      if (int_clamping_fix && __gen_sampler_need_fix(sampler))                  \
> -      return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1);
> \
> -    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
> +      return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 1);      \
> +    return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0);        \
>    }
> 
>  // For float coordinates
> @@ -395,7 +304,6 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>                                          const sampler_t sampler,              \
>                                          coord_type coord)                     \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
>      if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
>        if (__gen_sampler_need_fix(sampler)) {                                  \
> @@ -407,11 +315,11 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
>              tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
>              return __gen_ocl_read_image ##suffix(                             \
> -                     surface_id, sampler, tmpCoord, 1);                       \
> +                     cl_image, sampler, tmpCoord, 1);                         \
>          }                                                                     \
>        }                                                                       \
>      }                                                                         \
> -    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0);  \
> +    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
>    }
> 
>  #define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type,
> \
> @@ -419,10 +327,9 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>    OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image,       \
>                                                 coord_type coord)              \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord = __gen_validate_array_index(coord, cl_image);                      \
>      return __gen_ocl_read_image ##suffix(                                     \
> -             surface_id, CLK_NORMALIZED_COORDS_FALSE |
> CLK_ADDRESS_NONE       \
> +             cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
> \
>               | CLK_FILTER_NEAREST, coord, 0);                                 \
>    }
> 
> @@ -431,15 +338,10 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>                                           coord_type coord,                    \
>                                           image_data_type color)               \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord_type fixedCoord = __gen_validate_array_index(coord, cl_image);
> \
> -    __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color);            \
> +    __gen_ocl_write_image ##suffix(cl_image, fixedCoord, color);              \
>    }
> 
> -#define int1 int
> -#define float1 float
> -
> -
>  #define DECL_IMAGE(int_clamping_fix, image_type, image_data_type,
> suffix, n)  \
>    DECL_READ_IMAGE0(int_clamping_fix, image_type,                              \
>                     image_data_type, suffix, int ##n)                          \
> @@ -495,13 +397,12 @@ INLINE_OVERLOADABLE int4
> __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
>                                          const sampler_t sampler,              \
>                                          coord_type coord)                     \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord = __gen_validate_array_index(coord, cl_image);                      \
>      if (int_clamping_fix && __gen_sampler_need_fix(sampler)) {                \
>        int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image);             \
> -      return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord,
> 2); \
> +      return __gen_ocl_read_image ##suffix(cl_image, sampler, newCoord,
> 2); \
>      }                                                                         \
> -    return  __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
> +    return  __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0);     \
>    }
> 
>  // For float coordiates
> @@ -511,7 +412,6 @@ INLINE_OVERLOADABLE int4
> __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
>                                          const sampler_t sampler,              \
>                                          coord_type coord)                     \
>    {                                                                           \
> -    GET_IMAGE(cl_image, surface_id);                                          \
>      coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
>      if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) {                          \
>        if (__gen_sampler_need_fix(sampler)) {                                  \
> @@ -523,11 +423,11 @@ INLINE_OVERLOADABLE int4
> __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
>                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
>              float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> \
>              return __gen_ocl_read_image ##suffix(                             \
> -                     surface_id, sampler, newCoord, 2);                       \
> +                     cl_image, sampler, newCoord, 2);                       \
>          }                                                                     \
>        }                                                                       \
>      }                                                                         \
> -    return  __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0);  \
> +    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
>    }
> 
>  #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> \
> @@ -547,18 +447,15 @@ DECL_IMAGE_1DArray(0, float4, f)
>  #define DECL_IMAGE_INFO_COMMON(image_type)                                    \
>    OVERLOADABLE  int get_image_channel_data_type(image_type image)
> \
>    {                                                                           \
> -    GET_IMAGE(image, surface_id);                                             \
> -    return __gen_ocl_get_image_channel_data_type(surface_id);                 \
> +    return __gen_ocl_get_image_channel_data_type(image);                 \
>    }                                                                           \
>    OVERLOADABLE  int get_image_channel_order(image_type image)
> \
>    {                                                                           \
> -    GET_IMAGE(image, surface_id);                                             \
> -    return __gen_ocl_get_image_channel_order(surface_id);                     \
> +    return __gen_ocl_get_image_channel_order(image);                     \
>    }                                                                           \
>    OVERLOADABLE int get_image_width(image_type image)                          \
>    {                                                                           \
> -    GET_IMAGE(image, surface_id);                                             \
> -    return __gen_ocl_get_image_width(surface_id);                             \
> +    return __gen_ocl_get_image_width(image);                             \
>    }
> 
>  DECL_IMAGE_INFO_COMMON(image1d_t)
> @@ -571,8 +468,7 @@ DECL_IMAGE_INFO_COMMON(image2d_array_t)
>  // 2D extra Info
>  OVERLOADABLE int get_image_height(image2d_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_height(surface_id);
> +  return __gen_ocl_get_image_height(image);
>  }
>  OVERLOADABLE int2 get_image_dim(image2d_t image)
>  {
> @@ -583,13 +479,11 @@ OVERLOADABLE int2 get_image_dim(image2d_t
> image)
>  // 3D extra Info
>  OVERLOADABLE int get_image_height(image3d_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_height(surface_id);
> +  return __gen_ocl_get_image_height(image);
>  }
>  OVERLOADABLE int get_image_depth(image3d_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_depth(surface_id);
> +  return __gen_ocl_get_image_depth(image);
>  }
>  OVERLOADABLE int4 get_image_dim(image3d_t image)
>  {
> @@ -602,8 +496,7 @@ OVERLOADABLE int4 get_image_dim(image3d_t
> image)
>  // 2D Array extra Info
>  OVERLOADABLE int get_image_height(image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_height(surface_id);
> +  return __gen_ocl_get_image_height(image);
>  }
>  OVERLOADABLE int2 get_image_dim(image2d_array_t image)
>  {
> @@ -611,14 +504,12 @@ OVERLOADABLE int2
> get_image_dim(image2d_array_t image)
>  }
>  OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_depth(surface_id);
> +  return __gen_ocl_get_image_depth(image);
>  }
> 
>  // 1D Array info
>  OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
>  {
> -  GET_IMAGE(image, surface_id);
> -  return __gen_ocl_get_image_depth(surface_id);
> +  return __gen_ocl_get_image_depth(image);
>  }
>  // End of 1DArray
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index afaa4a5..512f437 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -286,7 +286,6 @@ namespace gbe
>        case 1: return ir::MEM_GLOBAL;
>        case 2: return ir::MEM_CONSTANT;
>        case 3: return ir::MEM_LOCAL;
> -      case 4: return ir::IMAGE;
>      }
>      GBE_ASSERT(false);
>      return ir::MEM_GLOBAL;
> @@ -1557,18 +1556,13 @@ namespace gbe
> 
>          llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode-
> >getOperand(1 + argID)))->getZExtValue();
>          llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(1
> + argID)))->getString();
> -        if (llvmInfo.typeName.find("image") != std::string::npos &&
> -            llvmInfo.typeName.find("*") != std::string::npos) {
> -          uint32_t start = llvmInfo.typeName.find("image");
> -          uint32_t end = llvmInfo.typeName.find("*");
> -          llvmInfo.typeName = llvmInfo.typeName.substr(start, end - start);
> -        }
>          llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(1
> + argID)))->getString();
>          llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(1 +
> argID)))->getString();
>          llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(1 +
> argID)))->getString();
> 
>          // function arguments are uniform values.
>          this->newRegister(I, NULL, true);
> +
>          // add support for vector argument.
>          if(type->isVectorTy()) {
>            VectorType *vectorType = cast<VectorType>(type);
> @@ -1591,6 +1585,12 @@ namespace gbe
>          GBE_ASSERTM(isScalarType(type) == true,
>                      "vector type in the function argument is not supported yet");
>          const ir::Register reg = getRegister(I);
> +        if (llvmInfo.isImageType()) {
> +          ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, 4, 4,
> 0);
[Yang, Rong R] Is it safe to hard code the size to 4?


> +          ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase());
> +          continue;
> +        }
> +
>          if (type->isPointerTy() == false)
>            ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo,
> getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
>          else {
> @@ -1625,10 +1625,6 @@ namespace gbe
>                case ir::MEM_CONSTANT:
>                  ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg,
> llvmInfo, ptrSize, align, 0x2);
>                break;
> -              case ir::IMAGE:
> -                ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo,
> ptrSize, align, 0x0);
> -                ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase());
> -              break;
>                default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
>              }
>            }
> @@ -2791,16 +2787,8 @@ namespace gbe
> 
>      // Get the name of the called function and handle it
>      const std::string fnName = Callee->getName();
> -    auto it = instrinsicMap.map.find(fnName);
> -    // FIXME, should create a complete error reporting mechanism
> -    // when found error in beignet managed passes including Gen pass.
> -    if (it == instrinsicMap.map.end()) {
> -      std::cerr << "Unresolved symbol: " << fnName << std::endl;
> -      std::cerr << "Aborting..." << std::endl;
> -      exit(-1);
> -    }
> -    GBE_ASSERT(it != instrinsicMap.map.end());
> -    switch (it->second) {
> +    auto genIntrinsicID = intrinsicMap.find(fnName);
> +    switch (genIntrinsicID) {
>        case GEN_OCL_GET_GROUP_ID0:
>          regTranslator.newScalarProxy(ir::ocl::groupid0, dst); break;
>        case GEN_OCL_GET_GROUP_ID1:
> @@ -2897,35 +2885,13 @@ namespace gbe
>        case GEN_OCL_LGBARRIER:
>          ctx.getFunction().setUseSLM(true);
>          break;
> -      case GEN_OCL_WRITE_IMAGE_I_1D:
> -      case GEN_OCL_WRITE_IMAGE_UI_1D:
> -      case GEN_OCL_WRITE_IMAGE_F_1D:
> -      case GEN_OCL_WRITE_IMAGE_I_2D:
> -      case GEN_OCL_WRITE_IMAGE_UI_2D:
> -      case GEN_OCL_WRITE_IMAGE_F_2D:
> -      case GEN_OCL_WRITE_IMAGE_I_3D:
> -      case GEN_OCL_WRITE_IMAGE_UI_3D:
> -      case GEN_OCL_WRITE_IMAGE_F_3D:
> +      case GEN_OCL_WRITE_IMAGE_I:
> +      case GEN_OCL_WRITE_IMAGE_UI:
> +      case GEN_OCL_WRITE_IMAGE_F:
>          break;
> -      case GEN_OCL_READ_IMAGE_I_1D:
> -      case GEN_OCL_READ_IMAGE_UI_1D:
> -      case GEN_OCL_READ_IMAGE_F_1D:
> -      case GEN_OCL_READ_IMAGE_I_2D:
> -      case GEN_OCL_READ_IMAGE_UI_2D:
> -      case GEN_OCL_READ_IMAGE_F_2D:
> -      case GEN_OCL_READ_IMAGE_I_3D:
> -      case GEN_OCL_READ_IMAGE_UI_3D:
> -      case GEN_OCL_READ_IMAGE_F_3D:
> -
> -      case GEN_OCL_READ_IMAGE_I_1D_I:
> -      case GEN_OCL_READ_IMAGE_UI_1D_I:
> -      case GEN_OCL_READ_IMAGE_F_1D_I:
> -      case GEN_OCL_READ_IMAGE_I_2D_I:
> -      case GEN_OCL_READ_IMAGE_UI_2D_I:
> -      case GEN_OCL_READ_IMAGE_F_2D_I:
> -      case GEN_OCL_READ_IMAGE_I_3D_I:
> -      case GEN_OCL_READ_IMAGE_UI_3D_I:
> -      case GEN_OCL_READ_IMAGE_F_3D_I:
> +      case GEN_OCL_READ_IMAGE_I:
> +      case GEN_OCL_READ_IMAGE_UI:
> +      case GEN_OCL_READ_IMAGE_F:
>        {
>          // dst is a 4 elements vector. We allocate all 4 registers here.
>          uint32_t elemNum;
> @@ -3058,11 +3024,7 @@ namespace gbe
>    }
> 
>    uint8_t GenWriter::getImageID(CallInst &I) {
> -    PtrOrigMapIter iter = pointerOrigMap.find(&I);
> -    GBE_ASSERT(iter != pointerOrigMap.end());
> -    SmallVectorImpl<Value *> &origins = iter->second;
> -    GBE_ASSERT(origins.size() == 1);
> -    const ir::Register imageReg = this->getRegister(origins[0]);
> +    const ir::Register imageReg = this->getRegister(I.getOperand(0));
>      return ctx.getFunction().getImageSet()->getIdx(imageReg);
>    }
> 
> @@ -3232,8 +3194,7 @@ namespace gbe
>          // Get the name of the called function and handle it
>          Value *Callee = I.getCalledValue();
>          const std::string fnName = Callee->getName();
> -        auto it = instrinsicMap.map.find(fnName);
> -        GBE_ASSERT(it != instrinsicMap.map.end());
> +        auto genIntrinsicID = intrinsicMap.find(fnName);
> 
>          // Get the function arguments
>          CallSite CS(&I);
> @@ -3242,7 +3203,7 @@ namespace gbe
>          CallSite::arg_iterator AE = CS.arg_end();
>  #endif /* GBE_DEBUG */
> 
> -        switch (it->second) {
> +        switch (genIntrinsicID) {
>            case GEN_OCL_POW:
>            {
>              const ir::Register src0 = this->getRegister(*AI); ++AI;
> @@ -3347,31 +3308,16 @@ namespace gbe
>              const uint8_t imageID = getImageID(I);
>              GBE_ASSERT(AI != AE); ++AI;
>              const ir::Register reg = this->getRegister(&I, 0);
> -            int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
> +            int infoType = genIntrinsicID - GEN_OCL_GET_IMAGE_WIDTH;
>              ir::ImageInfoKey key(imageID, infoType);
>              const ir::Register infoReg = ctx.getFunction().getImageSet()-
> >appendInfo(key, &ctx);
>              ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
>              break;
>            }
> 
> -          case GEN_OCL_READ_IMAGE_I_1D:
> -          case GEN_OCL_READ_IMAGE_UI_1D:
> -          case GEN_OCL_READ_IMAGE_F_1D:
> -          case GEN_OCL_READ_IMAGE_I_1D_I:
> -          case GEN_OCL_READ_IMAGE_UI_1D_I:
> -          case GEN_OCL_READ_IMAGE_F_1D_I:
> -          case GEN_OCL_READ_IMAGE_I_2D:
> -          case GEN_OCL_READ_IMAGE_UI_2D:
> -          case GEN_OCL_READ_IMAGE_F_2D:
> -          case GEN_OCL_READ_IMAGE_I_2D_I:
> -          case GEN_OCL_READ_IMAGE_UI_2D_I:
> -          case GEN_OCL_READ_IMAGE_F_2D_I:
> -          case GEN_OCL_READ_IMAGE_I_3D:
> -          case GEN_OCL_READ_IMAGE_UI_3D:
> -          case GEN_OCL_READ_IMAGE_F_3D:
> -          case GEN_OCL_READ_IMAGE_I_3D_I:
> -          case GEN_OCL_READ_IMAGE_UI_3D_I:
> -          case GEN_OCL_READ_IMAGE_F_3D_I:
> +          case GEN_OCL_READ_IMAGE_I:
> +          case GEN_OCL_READ_IMAGE_UI:
> +          case GEN_OCL_READ_IMAGE_F:
>            {
>              const uint8_t imageID = getImageID(I);
>              GBE_ASSERT(AI != AE); ++AI;
> @@ -3379,7 +3325,7 @@ namespace gbe
>              const uint8_t sampler = this->appendSampler(AI);
>              ++AI; GBE_ASSERT(AI != AE);
>              uint32_t coordNum;
> -            (void)getVectorInfo(ctx, *AI, coordNum);
> +            const ir::Type coordType = getVectorInfo(ctx, *AI, coordNum);
>              if (coordNum == 4)
>                coordNum = 3;
>              const uint32_t imageDim = coordNum;
> @@ -3396,7 +3342,7 @@ namespace gbe
>              GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> ir::TYPE_S32, "Invalid sampler type");
>              samplerOffset = x.getIntegerValue();
>  #endif
> -            bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> +            bool isFloatCoord = coordType == ir::TYPE_FLOAT;
>              bool requiredFloatCoord = samplerOffset == 0;
> 
>              vector<ir::Register> dstTupleData, srcTupleData;
> @@ -3422,7 +3368,7 @@ namespace gbe
>              }
> 
>              uint32_t elemNum;
> -            (void)getVectorInfo(ctx, &I, elemNum);
> +            ir::Type dstType = getVectorInfo(ctx, &I, elemNum);
>              GBE_ASSERT(elemNum == 4);
> 
>              for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> @@ -3432,49 +3378,14 @@ namespace gbe
>              const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
>              const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
> 
> -            ir::Type dstType = ir::TYPE_U32;
> -
> -            switch(it->second) {
> -              case GEN_OCL_READ_IMAGE_I_1D:
> -              case GEN_OCL_READ_IMAGE_UI_1D:
> -              case GEN_OCL_READ_IMAGE_I_2D:
> -              case GEN_OCL_READ_IMAGE_UI_2D:
> -              case GEN_OCL_READ_IMAGE_I_3D:
> -              case GEN_OCL_READ_IMAGE_UI_3D:
> -              case GEN_OCL_READ_IMAGE_I_1D_I:
> -              case GEN_OCL_READ_IMAGE_UI_1D_I:
> -              case GEN_OCL_READ_IMAGE_I_2D_I:
> -              case GEN_OCL_READ_IMAGE_UI_2D_I:
> -              case GEN_OCL_READ_IMAGE_I_3D_I:
> -              case GEN_OCL_READ_IMAGE_UI_3D_I:
> -                dstType = ir::TYPE_U32;
> -                break;
> -              case GEN_OCL_READ_IMAGE_F_1D:
> -              case GEN_OCL_READ_IMAGE_F_2D:
> -              case GEN_OCL_READ_IMAGE_F_3D:
> -              case GEN_OCL_READ_IMAGE_F_1D_I:
> -              case GEN_OCL_READ_IMAGE_F_2D_I:
> -              case GEN_OCL_READ_IMAGE_F_3D_I:
> -                dstType = ir::TYPE_FLOAT;
> -                break;
> -              default:
> -                GBE_ASSERT(0); // never been here.
> -            }
> -
>              ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
>                         requiredFloatCoord, sampler, samplerOffset);
>              break;
>            }
> 
> -          case GEN_OCL_WRITE_IMAGE_I_1D:
> -          case GEN_OCL_WRITE_IMAGE_UI_1D:
> -          case GEN_OCL_WRITE_IMAGE_F_1D:
> -          case GEN_OCL_WRITE_IMAGE_I_2D:
> -          case GEN_OCL_WRITE_IMAGE_UI_2D:
> -          case GEN_OCL_WRITE_IMAGE_F_2D:
> -          case GEN_OCL_WRITE_IMAGE_I_3D:
> -          case GEN_OCL_WRITE_IMAGE_UI_3D:
> -          case GEN_OCL_WRITE_IMAGE_F_3D:
> +          case GEN_OCL_WRITE_IMAGE_I:
> +          case GEN_OCL_WRITE_IMAGE_UI:
> +          case GEN_OCL_WRITE_IMAGE_F:
>            {
>              const uint8_t imageID = getImageID(I);
>              GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
> @@ -3498,7 +3409,7 @@ namespace gbe
>              }
>              ++AI; GBE_ASSERT(AI != AE);
>              uint32_t elemNum;
> -            (void)getVectorInfo(ctx, *AI, elemNum);
> +            ir::Type srcType = getVectorInfo(ctx, *AI, elemNum);
>              GBE_ASSERT(elemNum == 4);
> 
>              for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> @@ -3506,27 +3417,6 @@ namespace gbe
>                srcTupleData.push_back(reg);
>              }
>              const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
> -
> -            ir::Type srcType = ir::TYPE_U32;
> -
> -            switch(it->second) {
> -              case GEN_OCL_WRITE_IMAGE_I_1D:
> -              case GEN_OCL_WRITE_IMAGE_UI_1D:
> -              case GEN_OCL_WRITE_IMAGE_I_2D:
> -              case GEN_OCL_WRITE_IMAGE_UI_2D:
> -              case GEN_OCL_WRITE_IMAGE_I_3D:
> -              case GEN_OCL_WRITE_IMAGE_UI_3D:
> -                srcType = ir::TYPE_U32;
> -                break;
> -              case GEN_OCL_WRITE_IMAGE_F_1D:
> -              case GEN_OCL_WRITE_IMAGE_F_2D:
> -              case GEN_OCL_WRITE_IMAGE_F_3D:
> -                srcType = ir::TYPE_FLOAT;
> -                break;
> -              default:
> -                GBE_ASSERT(0); // never been here.
> -            }
> -
>              ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
>              break;
>            }
> @@ -3665,7 +3555,7 @@ namespace gbe
>              //Becasue cmp's sources are same as sel's source, so cmp instruction
> and sel
>              //instruction will be merged to one sel_cmp instruction in the gen
> selection
>              //Add two intruction here for simple.
> -            if(it->second == GEN_OCL_FMAX)
> +            if(genIntrinsicID == GEN_OCL_FMAX)
>                ctx.GE(getType(ctx, I.getType()), cmp, src0, src1);
>              else
>                ctx.LT(getType(ctx, I.getType()), cmp, src0, src1);
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> b/backend/src/llvm/llvm_gen_backend.hpp
> index ae0a818..ed7a57e 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -26,6 +26,7 @@
>  #ifndef __GBE_LLVM_GEN_BACKEND_HPP__
>  #define __GBE_LLVM_GEN_BACKEND_HPP__
> 
> +#include <cxxabi.h>
>  #include "llvm/Config/llvm-config.h"
>  #include "llvm/Pass.h"
>  #include "llvm/Analysis/LoopPass.h"
> @@ -60,10 +61,31 @@ namespace gbe
>      }
>      /*! Sort intrinsics with their names */
>      hash_map<std::string, OCLInstrinsic> map;
> +    OCLInstrinsic find(const std::string symbol) const {
> +      auto it = map.find(symbol);
> +
> +      if (it == map.end()) {
> +        int status;
> +        const char *realName = abi::__cxa_demangle(symbol.c_str(), NULL,
> NULL, &status);
> +        if (status == 0) {
> +          std::string realFnName(realName), stripName;
> +          stripName = realFnName.substr(0, realFnName.find("("));
> +          it = map.find(stripName);
> +        }
> +      }
> +      // FIXME, should create a complete error reporting mechanism
> +      // when found error in beignet managed passes including Gen pass.
> +      if (it == map.end()) {
> +        std::cerr << "Unresolved symbol: " << symbol << std::endl;
> +        std::cerr << "Aborting..." << std::endl;
> +        exit(-1);
> +      }
> +      return it->second;
> +    }
>    };
> 
>    /*! Sort the OCL Gen instrinsic functions (built on pre-main) */
> -  static const OCLIntrinsicMap instrinsicMap;
> +  static const OCLIntrinsicMap intrinsicMap;
> 
>    /*! Pad the offset */
>    int32_t getPadding(int32_t offset, int32_t align);
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 8d55c3f..8ec8336 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -46,38 +46,14 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8,
> __gen_ocl_force_simd8)
>  DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
> 
>  // To read_image functions.
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D,
> _Z21__gen_ocl_read_imageijtfj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D,
> _Z22__gen_ocl_read_imageuijtfj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D,
> _Z21__gen_ocl_read_imagefjtfj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D,
> _Z21__gen_ocl_read_imageijtDv2_fj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D,
> _Z22__gen_ocl_read_imageuijtDv2_fj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D,
> _Z21__gen_ocl_read_imagefjtDv2_fj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
> _Z21__gen_ocl_read_imageijtDv4_fj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
> _Z22__gen_ocl_read_imageuijtDv4_fj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
> _Z21__gen_ocl_read_imagefjtDv4_fj)
> -// work around read image with the LD message. The coords are integer
> type.
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I,
> _Z21__gen_ocl_read_imageijtij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I,
> _Z22__gen_ocl_read_imageuijtij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I,
> _Z21__gen_ocl_read_imagefjtij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I,
> _Z21__gen_ocl_read_imageijtDv2_ij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I,
> _Z22__gen_ocl_read_imageuijtDv2_ij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I,
> _Z21__gen_ocl_read_imagefjtDv2_ij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> _Z21__gen_ocl_read_imageijtDv4_ij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> _Z22__gen_ocl_read_imageuijtDv4_ij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> _Z21__gen_ocl_read_imagefjtDv4_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, __gen_ocl_read_imagei)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, __gen_ocl_read_imageui)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, __gen_ocl_read_imagef)
> 
>  // To write_image functions.
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D,
> _Z22__gen_ocl_write_imageijiDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D,
> _Z23__gen_ocl_write_imageuijiDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D,
> _Z22__gen_ocl_write_imagefjiDv4_f)
> -
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D,
> _Z22__gen_ocl_write_imageijDv2_iDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D,
> _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D,
> _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
> -
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
> _Z22__gen_ocl_write_imageijDv4_iS_)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
> _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
> _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, __gen_ocl_write_imagei)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI,
> __gen_ocl_write_imageui)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, __gen_ocl_write_imagef)
> 
>  // To get image info function
>  DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH,
> __gen_ocl_get_image_width)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index baf526b..cf2939d 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -636,42 +636,17 @@ namespace gbe {
>        } else {
>          Value *Callee = call->getCalledValue();
>          const std::string fnName = Callee->getName();
> -        auto it = instrinsicMap.map.find(fnName);
> -        // FIXME, should create a complete error reporting mechanism
> -        // when found error in beignet managed passes including Gen pass.
> -        if (it == instrinsicMap.map.end()) {
> -          std::cerr << "Unresolved symbol: " << fnName << std::endl;
> -          std::cerr << "Aborting..." << std::endl;
> -          exit(-1);
> -        }
> -        GBE_ASSERT(it != instrinsicMap.map.end());
> +        auto genIntrinsicID = intrinsicMap.find(fnName);
> 
>          // Get the function arguments
>          CallSite CS(call);
>          CallSite::arg_iterator CI = CS.arg_begin() + 1;
> 
> -        switch (it->second) {
> +        switch (genIntrinsicID) {
>            default: break;
> -          case GEN_OCL_READ_IMAGE_I_1D:
> -          case GEN_OCL_READ_IMAGE_UI_1D:
> -          case GEN_OCL_READ_IMAGE_F_1D:
> -          case GEN_OCL_READ_IMAGE_I_2D:
> -          case GEN_OCL_READ_IMAGE_UI_2D:
> -          case GEN_OCL_READ_IMAGE_F_2D:
> -          case GEN_OCL_READ_IMAGE_I_3D:
> -          case GEN_OCL_READ_IMAGE_UI_3D:
> -          case GEN_OCL_READ_IMAGE_F_3D:
> -          case GEN_OCL_READ_IMAGE_I_1D_I:
> -          case GEN_OCL_READ_IMAGE_UI_1D_I:
> -          case GEN_OCL_READ_IMAGE_F_1D_I:
> -          case GEN_OCL_READ_IMAGE_I_2D_I:
> -          case GEN_OCL_READ_IMAGE_UI_2D_I:
> -          case GEN_OCL_READ_IMAGE_F_2D_I:
> -          case GEN_OCL_READ_IMAGE_I_3D_I:
> -          case GEN_OCL_READ_IMAGE_UI_3D_I:
> -          case GEN_OCL_READ_IMAGE_F_3D_I:
> -          case GEN_OCL_GET_IMAGE_WIDTH:
> -          case GEN_OCL_GET_IMAGE_HEIGHT:
> +          case GEN_OCL_READ_IMAGE_I:
> +          case GEN_OCL_READ_IMAGE_UI:
> +          case GEN_OCL_READ_IMAGE_F:
>            {
>              ++CI;
>              if ((*CI)->getType()->isVectorTy())
> @@ -680,15 +655,9 @@ namespace gbe {
>              extractFromVector(call);
>              break;
>            }
> -          case GEN_OCL_WRITE_IMAGE_I_3D:
> -          case GEN_OCL_WRITE_IMAGE_UI_3D:
> -          case GEN_OCL_WRITE_IMAGE_F_3D:
> -          case GEN_OCL_WRITE_IMAGE_I_2D:
> -          case GEN_OCL_WRITE_IMAGE_UI_2D:
> -          case GEN_OCL_WRITE_IMAGE_F_2D:
> -          case GEN_OCL_WRITE_IMAGE_I_1D:
> -          case GEN_OCL_WRITE_IMAGE_UI_1D:
> -          case GEN_OCL_WRITE_IMAGE_F_1D:
> +          case GEN_OCL_WRITE_IMAGE_I:
> +          case GEN_OCL_WRITE_IMAGE_UI:
> +          case GEN_OCL_WRITE_IMAGE_F:
>            {
>              if ((*CI)->getType()->isVectorTy())
>                *CI = InsertToVector(call, *CI);
> --
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list