[Beignet] [PATCH] Refactor all image builtin functions.
Yang, Rong R
rong.r.yang at intel.com
Wed Dec 17 23:39:13 PST 2014
This patch LGTM, thanks.
> -----Original Message-----
> From: Gong, Zhigang
> Sent: Wednesday, December 17, 2014 09:42
> To: beignet at lists.freedesktop.org
> Cc: Yang, Rong R; Gong, Zhigang
> Subject: [PATCH] Refactor all image builtin functions.
>
> Refactor almost all the image builtin related functions to simplfy the code
> and get rid of most of the awful macros.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/libocl/src/ocl_image.cl | 811 ++++++++++++++++++---------
> --
> backend/src/llvm/llvm_gen_backend.cpp | 174 ++++---
> backend/src/llvm/llvm_gen_ocl_function.hxx | 36 +-
> backend/src/llvm/llvm_scalarize.cpp | 13 +-
> 4 files changed, 618 insertions(+), 416 deletions(-)
>
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index fd421bf..95b98ff 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -20,29 +20,90 @@
> #include "ocl_integer.h"
> #include "ocl_common.h"
>
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Beignet builtin functions.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +
> // 1D read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + float u, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + int u, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + float u, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + int u, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + float u, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + int u, uint sampler_offset);
>
> // 2D & 1D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, int v, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + float2 coord, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + int2 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + float2 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + int2 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + float2 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + int2 coord, uint sampler_offset);
>
> // 3D & 2D Array read
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, float u, float v, float w, uint sampler_offset);
> -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler, int u, int v, int w, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + float4 coord, uint sampler_offset);
> +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t
> sampler,
> + int4 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + float4 coord, uint sampler_offset);
> +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t
> sampler,
> + int4 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + float4 coord, uint sampler_offset);
> +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t
> sampler,
> + int4 coord, uint sampler_offset);
> +
> +// Don't know why we need to support 3 component coordinates, but it's in
> the old
> +// version, let's keep to support it.
> +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> + float3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imagei(surface_id, sampler,
> + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id,
> sampler_t sampler,
> + int3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imagei(surface_id, sampler,
> + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> + float3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imageui(surface_id, sampler,
> + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id,
> sampler_t sampler,
> + int3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imageui(surface_id, sampler,
> + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> + float3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imagef(surface_id, sampler,
> + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
> +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id,
> sampler_t sampler,
> + int3 coord, uint sampler_offset)
> +{
> + return __gen_ocl_read_imagef(surface_id, sampler,
> + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
> +}
>
> // 1D write
> OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4
> color);
> @@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint
> surface_id, int u, uint4 color);
> OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4
> color);
>
> // 2D & 1D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v,
> int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v,
> uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v,
> float4 color);
> +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord,
> int4 color);
> +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord,
> uint4 color);
> +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord,
> float4 color);
>
> // 3D & 2D Array write
> -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v,
> int w, int4 color);
> -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v,
> int w, uint4 color);
> -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v,
> int w, float4 color);
> +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord,
> int4 color);
> +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord,
> uint4 color);
> +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord,
> float4 color);
> +
> +INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3
> coord, int4 color)
> +{
> + __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0),
> color);
> +}
> +INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id,
> int3 coord, uint4 color)
> +{
> + __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2,
> 0), color);
> +}
> +INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3
> coord, float4 color)
> +{
> + __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2,
> 0), color);
> +}
>
> int __gen_ocl_get_image_width(uint surface_id);
> int __gen_ocl_get_image_height(uint surface_id);
> @@ -65,225 +139,436 @@ int
> __gen_ocl_get_image_channel_data_type(uint surface_id);
> int __gen_ocl_get_image_channel_order(uint surface_id);
> int __gen_ocl_get_image_depth(uint surface_id);
>
> -// 2D 3D Image Common Macro
> -#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> -#define GEN_FIX_1 1
> -#else
> -#define GEN_FIX_1 0
> -#endif
>
> #define GET_IMAGE(cl_image, surface_id) \
> uint surface_id = (uint)cl_image
> -OVERLOADABLE float __gen_compute_array_index(const float index,
> image1d_array_t image)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// helper functions to validate array index.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord,
> image1d_array_t image)
> {
> GET_IMAGE(image, surface_id);
> float array_size = __gen_ocl_get_image_depth(surface_id);
> - return clamp(rint(index), 0.f, array_size - 1.f);
> + coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
> + return coord;
> }
>
> -OVERLOADABLE float __gen_compute_array_index(float index,
> image2d_array_t image)
> +INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord,
> image2d_array_t image)
> {
> GET_IMAGE(image, surface_id);
> float array_size = __gen_ocl_get_image_depth(surface_id);
> - return clamp(rint(index), 0.f, array_size - 1.f);
> + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
> + return coord;
> }
>
> -OVERLOADABLE int __gen_compute_array_index(int index,
> image1d_array_t image)
> +INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord,
> image2d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + float array_size = __gen_ocl_get_image_depth(surface_id);
> + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
> + return coord;
> +}
> +
> +INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord,
> image1d_array_t image)
> {
> GET_IMAGE(image, surface_id);
> int array_size = __gen_ocl_get_image_depth(surface_id);
> - return clamp(index, 0, array_size - 1);
> + coord.s1 = clamp(coord.s1, 0, array_size - 1);
> + return coord;
> }
>
> -OVERLOADABLE int __gen_compute_array_index(int index,
> image2d_array_t image)
> +INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord,
> image2d_array_t image)
> {
> GET_IMAGE(image, surface_id);
> int array_size = __gen_ocl_get_image_depth(surface_id);
> - return clamp(index, 0, array_size - 1);
> -}
> -
> -#define DECL_READ_IMAGE0(int_clamping_fix, \
> - image_type, type, suffix, coord_type, n) \
> - OVERLOADABLE type read_image ##suffix(image_type cl_image, \
> - const sampler_t sampler, \
> - coord_type coord) \
> - { \
> - GET_IMAGE(cl_image, surface_id); \
> - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \
> - if (int_clamping_fix && \
> - ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
> \
> - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \
> - return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORD(surface_id, sampler, coord)); \
> - return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \
> - }
> + coord.s2 = clamp(coord.s2, 0, array_size - 1);
> + return coord;
> +}
>
> -#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix,
> \
> - image_type, type, suffix, coord_type, n) \
> - OVERLOADABLE type read_image ##suffix(image_type cl_image, \
> - const sampler_t sampler, \
> - coord_type coord) \
> - { \
> - GET_IMAGE(cl_image, surface_id); \
> - GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \
> - coord_type tmpCoord = coord; \
> - if (float_coord_rounding_fix | int_clamping_fix) { \
> - if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> \
> - && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
> - if (float_coord_rounding_fix \
> - && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
> - FIXUP_FLOAT_COORD(tmpCoord); \
> - } \
> - if (int_clamping_fix) { \
> - coord_type intCoord; \
> - if (sampler & CLK_NORMALIZED_COORDS_TRUE) { \
> - DENORMALIZE_COORD(surface_id, intCoord, tmpCoord); \
> - } else \
> - intCoord = tmpCoord; \
> - return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
> - } \
> - } \
> - } \
> - return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
> - }
> +INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord,
> image2d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + int array_size = __gen_ocl_get_image_depth(surface_id);
> + coord.s2 = clamp(coord.s2, 0, array_size - 1);
> + return coord;
> +}
> +
> +// For non array image type, we need to do nothing.
> +#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
> +INLINE_OVERLOADABLE coord_type
> __gen_validate_array_index(coord_type coord, image_type image) \
> +{ \
> + return coord; \
> +}
> +
> +GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
> +GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
> +GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Helper functions to work around some coordiate boundary issues.
> +// The major issue on Gen7/Gen7.5 are the sample message could not
> sampling
> +// integer type surfaces correctly with CLK_ADDRESS_CLAMP and
> CLK_FILTER_NEAREST.
> +// The work around is to use a LD message instead of normal sample
> message.
> +/////////////////////////////////////////////////////////////////////////////
> //
> +bool __gen_sampler_need_fix(const sampler_t sampler)
> +{
> + return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> + ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> +}
> +
> +bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
> +{
> + return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> +}
> +
> +
> +INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
> +{
> + if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
> + tmpCoord += -0x1p-9f;
> + return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord)
> +{
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> + tmpCoord.s0 += -0x1p-9f;
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> + tmpCoord.s1 += -0x1p-9f;
> + return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord)
> +{
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> + tmpCoord.s0 += -0x1p-9f;
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> + tmpCoord.s1 += -0x1p-9f;
> + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
> + tmpCoord.s2 += -0x1p-9f;
> + return tmpCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
> +{
> + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
> + tmpCoord.s0 += -0x1p-9f;
> + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
> + tmpCoord.s1 += -0x1p-9f;
> + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
> + tmpCoord.s2 += -0x1p-9f;
> + return tmpCoord;
> +}
> +
> +// Functions to denormalize coordiates, it's needed when we need to use
> LD
> +// message (sampler offset is non-zero) and the coordiates are normalized
> +// coordiates.
> +INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t
> image, float srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + return srcCoord * __gen_ocl_get_image_width(surface_id);
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const
> image1d_array_t image, float2 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float __gen_denormalize_coord(const
> image1d_buffer_t image, float srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + return srcCoord * __gen_ocl_get_image_width(surface_id);
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t
> image, float2 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> + return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const
> image2d_array_t image, float3 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> + return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t
> image, float3 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> + return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const
> image2d_array_t image, float4 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> + return srcCoord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t
> image, float4 srcCoord)
> +{
> + GET_IMAGE(image, surface_id);
> + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
> + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
> + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
> + return srcCoord;
> +}
> +
> +// After denormalize, we have to fixup the negative boundary.
> +INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord)
> +{
> + return coord < 0 ? -1 : coord;
> +}
> +
> +INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord)
> +{
> + coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> + coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> + return coord;
> +}
> +
> +INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord)
> +{
> + coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> + coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> + coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
> + return coord;
> +}
>
> -#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix,
> coord_type, n) \
> - OVERLOADABLE type read_image ##suffix(image_type cl_image, \
> - coord_type coord) \
> - { \
> - GET_IMAGE(cl_image, surface_id); \
> - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \
> - return __gen_ocl_read_image ##suffix( \
> - EXPEND_READ_COORDF(surface_id, \
> - CLK_NORMALIZED_COORDS_FALSE \
> - | CLK_ADDRESS_NONE \
> - | CLK_FILTER_NEAREST, (float)coord), 0); \
> +INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
> +{
> + coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> + coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
> + coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
> + return coord;
> +}
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Built-in Image Read/Write Functions
> +/////////////////////////////////////////////////////////////////////////////
> //
> +
> +// 2D 3D Image Common Macro
> +#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> +#define GEN_FIX_FLOAT_ROUNDING 1
> +#define GEN_FIX_INT_CLAMPING 1
> +#else
> +#define GEN_FIX_FLOAT_ROUNDING 0
> +#define GEN_FIX_INT_CLAMPING 0
> +#endif
> +
> +// For integer coordinates
> +#define DECL_READ_IMAGE0(int_clamping_fix, image_type, \
> + image_data_type, suffix, coord_type) \
> + OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image, \
> + const sampler_t sampler, \
> + coord_type coord) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord = __gen_validate_array_index(coord, cl_image); \
> + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1);
> \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
> }
>
> -#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
> - OVERLOADABLE void write_image ##suffix(image_type cl_image,
> coord_type coord, type color)\
> - {\
> - GET_IMAGE(cl_image, surface_id);\
> - __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id,
> coord, color));\
> +// For float coordinates
> +#define DECL_READ_IMAGE1(int_clamping_fix, image_type, \
> + image_data_type, suffix, coord_type) \
> + OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image, \
> + const sampler_t sampler, \
> + coord_type coord) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
> + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
> + if (__gen_sampler_need_fix(sampler)) { \
> + if (GEN_FIX_FLOAT_ROUNDING && \
> + __gen_sampler_need_rounding_fix(sampler)) \
> + tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> + if (int_clamping_fix) { \
> + if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> + tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
> + return __gen_ocl_read_image ##suffix( \
> + surface_id, sampler, tmpCoord, 1); \
> + } \
> + } \
> + } \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0); \
> }
>
> -#define DECL_IMAGE_INFO_COMMON(image_type) \
> - OVERLOADABLE int get_image_channel_data_type(image_type image)\
> - { \
> - GET_IMAGE(image, surface_id);\
> - return __gen_ocl_get_image_channel_data_type(surface_id); \
> - }\
> - OVERLOADABLE int get_image_channel_order(image_type image)\
> - { \
> - GET_IMAGE(image, surface_id);\
> - return __gen_ocl_get_image_channel_order(surface_id); \
> - } \
> - OVERLOADABLE int get_image_width(image_type image) \
> - { \
> - GET_IMAGE(image, surface_id); \
> - return __gen_ocl_get_image_width(surface_id); \
> +#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type,
> \
> + suffix, coord_type) \
> + OVERLOADABLE image_data_type read_image ##suffix(image_type
> cl_image, \
> + coord_type coord) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord = __gen_validate_array_index(coord, cl_image); \
> + return __gen_ocl_read_image ##suffix( \
> + surface_id, CLK_NORMALIZED_COORDS_FALSE |
> CLK_ADDRESS_NONE \
> + | CLK_FILTER_NEAREST, coord, 0); \
> }
>
> -// 1D
> -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix)
> \
> - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1)
> \
> - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type,
> suffix, float, 1) \
> - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1)
> \
> - DECL_WRITE_IMAGE(image_type, type, suffix, int) \
> - DECL_WRITE_IMAGE(image_type, type, suffix, float)
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord
> < 0 ? -1 : coord), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord =
> srcCoord * __gen_ocl_get_image_width(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
> -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord) \
> - { \
> - if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
> - tmpCoord += -0x1p-9f; \
> +#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> coord_type) \
> + OVERLOADABLE void write_image ##suffix(image_type cl_image, \
> + coord_type coord, \
> + image_data_type color) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord_type fixedCoord = __gen_validate_array_index(coord, cl_image);
> \
> + __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \
> }
>
> -DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
> -DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
> -DECL_IMAGE(0, image1d_t, float4, f)
> -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i)
> -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui)
> -DECL_IMAGE(0, image1d_buffer_t, float4, f)
> +#define int1 int
> +#define float1 float
>
> -// 1D Info
> -DECL_IMAGE_INFO_COMMON(image1d_t)
> -DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
>
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef DECL_IMAGE
> -// End of 1D
> -
> -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n)
> \
> - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n)
> \
> - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type,
> suffix, float ##n, n) \
> - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n)
> \
> - DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
> - DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
> -// 2D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler,
> (int)(coord.s0 < 0 ? -1 : coord.s0), \
> - (int)(coord.s1 < 0 ? -1 : coord.s1), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> - dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord) \
> - { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9f; \
> - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> - tmpCoord.s1 += -0x1p-9f; \
> +#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type,
> suffix, n) \
> + DECL_READ_IMAGE0(int_clamping_fix, image_type, \
> + image_data_type, suffix, int ##n) \
> + DECL_READ_IMAGE1(int_clamping_fix, image_type, \
> + image_data_type, suffix, float ##n) \
> + DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix,
> int ##n) \
> + DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n)
> \
> +
> +// 1D
> +#define DECL_IMAGE_TYPE(image_type, n) \
> + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n) \
> + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n)
> \
> + DECL_IMAGE(0, image_type, float4, f, n)
> +
> +DECL_IMAGE_TYPE(image1d_t, 1)
> +DECL_IMAGE_TYPE(image1d_buffer_t, 1)
> +DECL_IMAGE_TYPE(image2d_t, 2)
> +DECL_IMAGE_TYPE(image3d_t, 4)
> +DECL_IMAGE_TYPE(image3d_t, 3)
> +DECL_IMAGE_TYPE(image2d_array_t, 4)
> +DECL_IMAGE_TYPE(image2d_array_t, 3)
> +
> +// For 1D Array:
> +// fixup_1darray_coord functions are to convert 1d array coord to 2d array
> coord
> +// and the caller must set the sampler offset to 2 by using this converted
> coord.
> +// It is used to work around an image 1d array restrication which could not
> set
> +// ai in the LD message. We solve it by fake the same image as a 2D array,
> and
> +// then access it by LD message as a 3D sufface, treat the ai as the w
> coordinate.
> +INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord,
> image1d_array_t image)
> +{
> + float4 newCoord;
> + newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0;
> + newCoord.s1 = 0;
> + newCoord.s2 = coord.s1;
> + newCoord.s3 = 0;
> + return newCoord;
> +}
> +
> +INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord,
> image1d_array_t image)
> +{
> + int4 newCoord;
> + newCoord.s0 = coord.s0;
> + newCoord.s1 = 0;
> + newCoord.s2 = coord.s1;
> + newCoord.s3 = 0;
> + return newCoord;
> +}
> +
> +// For integer coordinates
> +#define DECL_READ_IMAGE0_1DArray(int_clamping_fix, \
> + image_data_type, suffix, coord_type) \
> + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t
> cl_image, \
> + const sampler_t sampler, \
> + coord_type coord) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord = __gen_validate_array_index(coord, cl_image); \
> + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \
> + int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord,
> 2); \
> + } \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0);
> \
> }
>
> -DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
> -DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
> -DECL_IMAGE(0, image2d_t, float4, f, 2)
> -
> -// 1D Array
> -#undef GET_IMAGE_ARRAY_SIZE
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> (int)0, ai, 2
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)ai
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler,
> (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0,
> __gen_compute_array_index(coord.s1, cl_image), color
> -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> - coord_type ai = __gen_compute_array_index(coord.s1, image);
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord) \
> - { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9f; \
> +// For float coordiates
> +#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,
> \
> + suffix, coord_type) \
> + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t
> cl_image, \
> + const sampler_t sampler, \
> + coord_type coord) \
> + { \
> + GET_IMAGE(cl_image, surface_id); \
> + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image);
> \
> + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
> + if (__gen_sampler_need_fix(sampler)) { \
> + if (GEN_FIX_FLOAT_ROUNDING && \
> + __gen_sampler_need_rounding_fix(sampler)) \
> + tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> + if (int_clamping_fix) { \
> + if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> + float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> \
> + return __gen_ocl_read_image ##suffix( \
> + surface_id, sampler, newCoord, 2); \
> + } \
> + } \
> + } \
> + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord,
> 0); \
> }
>
> -DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
> -DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
> -DECL_IMAGE(0, image1d_array_t, float4, f, 2)
> +#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> \
> + DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix,
> int2) \
> + DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type,
> \
> + suffix, float2) \
> + DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type,
> suffix, int2) \
> + DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2)
> \
> +
> +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i)
> +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui)
> +DECL_IMAGE_1DArray(0, float4, f)
> +
> +/////////////////////////////////////////////////////////////////////////////
> //
> +// Built-in Image Query Functions
> +/////////////////////////////////////////////////////////////////////////////
> //
> +#define DECL_IMAGE_INFO_COMMON(image_type) \
> + OVERLOADABLE int get_image_channel_data_type(image_type image)
> \
> + { \
> + GET_IMAGE(image, surface_id); \
> + return __gen_ocl_get_image_channel_data_type(surface_id); \
> + } \
> + OVERLOADABLE int get_image_channel_order(image_type image)
> \
> + { \
> + GET_IMAGE(image, surface_id); \
> + return __gen_ocl_get_image_channel_order(surface_id); \
> + } \
> + OVERLOADABLE int get_image_width(image_type image) \
> + { \
> + GET_IMAGE(image, surface_id); \
> + return __gen_ocl_get_image_width(surface_id); \
> + }
>
> -// 2D Info
> +DECL_IMAGE_INFO_COMMON(image1d_t)
> +DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
> +DECL_IMAGE_INFO_COMMON(image1d_array_t)
> DECL_IMAGE_INFO_COMMON(image2d_t)
> +DECL_IMAGE_INFO_COMMON(image3d_t)
> +DECL_IMAGE_INFO_COMMON(image2d_array_t)
> +
> +// 2D extra Info
> OVERLOADABLE int get_image_height(image2d_t image)
> {
> GET_IMAGE(image, surface_id);
> @@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t
> image)
> {
> return (int2){get_image_width(image), get_image_height(image)};
> }
> +// End of 2D
>
> -// 1D Array info
> -DECL_IMAGE_INFO_COMMON(image1d_array_t)
> -OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> -{
> - GET_IMAGE(image, surface_id);
> - return __gen_ocl_get_image_depth(surface_id);
> -}
> -
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDI
> -#undef EXPEND_READ_COORDF
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -// End of 2D and 1D Array
> -
> -// 3D
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, coord.s2, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1, (float)coord.s2
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)
> (coord.s0 < 0 ? -1 : coord.s0), \
> - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ?
> -1 : coord.s2), 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> - dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id); \
> - dstCoord.z = srcCoord.z *
> __gen_ocl_get_image_depth(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> coord.s2, color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord) \
> - { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9f; \
> - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> - tmpCoord.s1 += -0x1p-9f; \
> - if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \
> - tmpCoord.s2 += -0x1p-9f; \
> - }
> -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
> -
> -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
> -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
> -DECL_IMAGE(0, image3d_t, float4, f, 4)
> -
> -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
> -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
> -DECL_IMAGE(0, image3d_t, float4, f, 3)
> -
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -
> -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0,
> coord.s1, ai, 1
> -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler,
> (float)coord.s0, (float)coord.s1, (float)ai
> -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)
> (coord.s0 < 0 ? -1 : coord.s0), \
> - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
> -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x =
> srcCoord.x * __gen_ocl_get_image_width(id); \
> - dstCoord.y = srcCoord.y *
> __gen_ocl_get_image_height(id);
> -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1,
> __gen_compute_array_index(coord.s2, cl_image), color
> -
> -#define FIXUP_FLOAT_COORD(tmpCoord) \
> - { \
> - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
> - tmpCoord.s0 += -0x1p-9f; \
> - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
> - tmpCoord.s1 += -0x1p-9f; \
> - }
> -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
> - coord_type ai = __gen_compute_array_index(coord.s2, image);
> -
> -// 2D Array
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
> -DECL_IMAGE(0, image2d_array_t, float4, f, 4)
> -
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3)
> -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3)
> -DECL_IMAGE(0, image2d_array_t, float4, f, 3)
> -
> -// 3D Info
> -DECL_IMAGE_INFO_COMMON(image3d_t)
> +// 3D extra Info
> OVERLOADABLE int get_image_height(image3d_t image)
> {
> GET_IMAGE(image, surface_id);
> @@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t
> image)
> }
> OVERLOADABLE int4 get_image_dim(image3d_t image)
> {
> - return (int4){get_image_width(image), get_image_height(image),
> get_image_depth(image), 0};
> + return (int4) (get_image_width(image),
> + get_image_height(image),
> + get_image_depth(image),
> + 0);
> }
>
> -// 2D Array Info
> -DECL_IMAGE_INFO_COMMON(image2d_array_t)
> +// 2D Array extra Info
> OVERLOADABLE int get_image_height(image2d_array_t image)
> {
> GET_IMAGE(image, surface_id);
> @@ -409,21 +615,10 @@ OVERLOADABLE size_t
> get_image_array_size(image2d_array_t image)
> return __gen_ocl_get_image_depth(surface_id);
> }
>
> -#undef EXPEND_READ_COORD
> -#undef EXPEND_READ_COORDF
> -#undef EXPEND_READ_COORDI
> -#undef DENORMALIZE_COORD
> -#undef EXPEND_WRITE_COORD
> -#undef FIXUP_FLOAT_COORD
> -#undef GET_IMAGE_ARRAY_SIZE
> -// End of 3D and 2D Array
> -
> -#undef DECL_IMAGE
> -#undef DECL_READ_IMAGE
> -#undef DECL_READ_IMAGE_NOSAMPLER
> -#undef DECL_WRITE_IMAGE
> -#undef GEN_FIX_1
> -// End of Image
> -
> -
> -#undef GET_IMAGE
> +// 1D Array info
> +OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
> +{
> + GET_IMAGE(image, surface_id);
> + return __gen_ocl_get_image_depth(surface_id);
> +}
> +// End of 1DArray
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index a438f09..afaa4a5 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -257,9 +257,10 @@ namespace gbe
> /*! Get number of element to process dealing either with a vector or a
> scalar
> * value
> */
> - static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value
> *value, uint32_t &elemNum, bool useUnsigned = false)
> + static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t
> &elemNum, bool useUnsigned = false)
> {
> ir::Type type;
> + Type *llvmType = value->getType();
> if (llvmType->isVectorTy() == true) {
> VectorType *vectorType = cast<VectorType>(llvmType);
> Type *elementType = vectorType->getElementType();
> @@ -629,6 +630,7 @@ namespace gbe
> void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
>
> uint8_t appendSampler(CallSite::arg_iterator AI);
> + uint8_t getImageID(CallInst &I);
>
> // These instructions are not supported at all
> void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
> @@ -2526,8 +2528,8 @@ namespace gbe
> Value *srcValue = I.getOperand(0);
> Value *dstValue = &I;
> uint32_t srcElemNum = 0, dstElemNum = 0 ;
> - ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue,
> srcElemNum);
> - ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue,
> dstElemNum);
> + ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum);
> + ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum);
> // As long and double are not compatible in register storage
> // and we do not support double yet, simply put an assert here
> GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType ==
> ir::TYPE_DOUBLE));
> @@ -2927,7 +2929,7 @@ namespace gbe
> {
> // dst is a 4 elements vector. We allocate all 4 registers here.
> uint32_t elemNum;
> - (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
> + (void)getVectorInfo(ctx, &I, elemNum);
> GBE_ASSERT(elemNum == 4);
> this->newRegister(&I);
> break;
> @@ -3055,6 +3057,15 @@ namespace gbe
> return index;
> }
>
> + uint8_t GenWriter::getImageID(CallInst &I) {
> + PtrOrigMapIter iter = pointerOrigMap.find(&I);
> + GBE_ASSERT(iter != pointerOrigMap.end());
> + SmallVectorImpl<Value *> &origins = iter->second;
> + GBE_ASSERT(origins.size() == 1);
> + const ir::Register imageReg = this->getRegister(origins[0]);
> + return ctx.getFunction().getImageSet()->getIdx(imageReg);
> + }
> +
> void GenWriter::emitCallInst(CallInst &I) {
> if (Function *F = I.getCalledFunction()) {
> if (F->getIntrinsicID() != 0) {
> @@ -3218,7 +3229,6 @@ namespace gbe
> default: NOT_IMPLEMENTED;
> }
> } else {
> - int image_dim;
> // Get the name of the called function and handle it
> Value *Callee = I.getCalledValue();
> const std::string fnName = Callee->getName();
> @@ -3334,13 +3344,13 @@ namespace gbe
> case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
> case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
> {
> - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> + const uint8_t imageID = getImageID(I);
> + GBE_ASSERT(AI != AE); ++AI;
> const ir::Register reg = this->getRegister(&I, 0);
> int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
> - const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> - ir::ImageInfoKey key(surfaceID, infoType);
> + ir::ImageInfoKey key(imageID, infoType);
> const ir::Register infoReg = ctx.getFunction().getImageSet()-
> >appendInfo(key, &ctx);
> - ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
> + ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
> break;
> }
>
> @@ -3350,69 +3360,75 @@ namespace gbe
> case GEN_OCL_READ_IMAGE_I_1D_I:
> case GEN_OCL_READ_IMAGE_UI_1D_I:
> case GEN_OCL_READ_IMAGE_F_1D_I:
> - image_dim = 1;
> - goto handle_read_image;
> case GEN_OCL_READ_IMAGE_I_2D:
> case GEN_OCL_READ_IMAGE_UI_2D:
> case GEN_OCL_READ_IMAGE_F_2D:
> case GEN_OCL_READ_IMAGE_I_2D_I:
> case GEN_OCL_READ_IMAGE_UI_2D_I:
> case GEN_OCL_READ_IMAGE_F_2D_I:
> - image_dim = 2;
> - goto handle_read_image;
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> case GEN_OCL_READ_IMAGE_F_3D:
> case GEN_OCL_READ_IMAGE_I_3D_I:
> case GEN_OCL_READ_IMAGE_UI_3D_I:
> case GEN_OCL_READ_IMAGE_F_3D_I:
> - image_dim = 3;
> -handle_read_image:
> {
> - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> - const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> + const uint8_t imageID = getImageID(I);
> + GBE_ASSERT(AI != AE); ++AI;
> GBE_ASSERT(AI != AE);
> const uint8_t sampler = this->appendSampler(AI);
> - ++AI;
> -
> - ir::Register ucoord;
> - ir::Register vcoord;
> - ir::Register wcoord;
> -
> - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
> - if (image_dim > 1) {
> - GBE_ASSERT(AI != AE);
> - vcoord = this->getRegister(*AI);
> - ++AI;
> - } else {
> - vcoord = ir::ocl::invalid;
> - }
> -
> - if (image_dim > 2) {
> - GBE_ASSERT(AI != AE);
> - wcoord = this->getRegister(*AI);
> - ++AI;
> - } else {
> - wcoord = ir::ocl::invalid;
> - }
> + ++AI; GBE_ASSERT(AI != AE);
> + uint32_t coordNum;
> + (void)getVectorInfo(ctx, *AI, coordNum);
> + if (coordNum == 4)
> + coordNum = 3;
> + const uint32_t imageDim = coordNum;
> + GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
>
> - vector<ir::Register> dstTupleData, srcTupleData;
> - const uint32_t elemNum = 4;
> - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> - const ir::Register reg = this->getRegister(&I, elemID);
> - dstTupleData.push_back(reg);
> - }
> - srcTupleData.push_back(ucoord);
> - srcTupleData.push_back(vcoord);
> - srcTupleData.push_back(wcoord);
> uint8_t samplerOffset = 0;
> + Value *coordVal = *AI;
> + ++AI; GBE_ASSERT(AI != AE);
> + Value *samplerOffsetVal = *AI;
> #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
> - GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
> + Constant *CPV = dyn_cast<Constant>(samplerOffsetVal);
> assert(CPV);
> const ir::Immediate &x = processConstantImm(CPV);
> GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> ir::TYPE_S32, "Invalid sampler type");
> samplerOffset = x.getIntegerValue();
> #endif
> + bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> + bool requiredFloatCoord = samplerOffset == 0;
> +
> + vector<ir::Register> dstTupleData, srcTupleData;
> + for (uint32_t elemID = 0; elemID < 3; elemID++) {
> + ir::Register reg;
> +
> + if (elemID < imageDim)
> + reg = this->getRegister(coordVal, elemID);
> + else
> + reg = ir::ocl::invalid;
> +
> + if (isFloatCoord == requiredFloatCoord)
> + srcTupleData.push_back(reg);
> + else if (!requiredFloatCoord) {
> + ir::Register intCoordReg =
> ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> + ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg);
> + srcTupleData.push_back(intCoordReg);
> + } else {
> + ir::Register floatCoordReg =
> ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
> + ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg);
> + srcTupleData.push_back(floatCoordReg);
> + }
> + }
> +
> + uint32_t elemNum;
> + (void)getVectorInfo(ctx, &I, elemNum);
> + GBE_ASSERT(elemNum == 4);
> +
> + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> + const ir::Register reg = this->getRegister(&I, elemID);
> + dstTupleData.push_back(reg);
> + }
> const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
> const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
>
> @@ -3445,58 +3461,46 @@ handle_read_image:
> GBE_ASSERT(0); // never been here.
> }
>
> - bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
> -
> - ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
> - isFloatCoord, sampler, samplerOffset);
> + ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
> + requiredFloatCoord, sampler, samplerOffset);
> break;
> }
>
> case GEN_OCL_WRITE_IMAGE_I_1D:
> case GEN_OCL_WRITE_IMAGE_UI_1D:
> case GEN_OCL_WRITE_IMAGE_F_1D:
> - image_dim = 1;
> - goto handle_write_image;
> case GEN_OCL_WRITE_IMAGE_I_2D:
> case GEN_OCL_WRITE_IMAGE_UI_2D:
> case GEN_OCL_WRITE_IMAGE_F_2D:
> - image_dim = 2;
> - goto handle_write_image;
> case GEN_OCL_WRITE_IMAGE_I_3D:
> case GEN_OCL_WRITE_IMAGE_UI_3D:
> case GEN_OCL_WRITE_IMAGE_F_3D:
> - image_dim = 3;
> -handle_write_image:
> {
> - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this-
> >getRegister(*AI); ++AI;
> - const uint8_t surfaceID = ctx.getFunction().getImageSet()-
> >getIdx(surfaceReg);
> - ir::Register ucoord, vcoord, wcoord;
> -
> - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
> + const uint8_t imageID = getImageID(I);
> + GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
> + uint32_t coordNum;
> + (void)getVectorInfo(ctx, *AI, coordNum);
> + if (coordNum == 4)
> + coordNum = 3;
> + const uint32_t imageDim = coordNum;
> + vector<ir::Register> srcTupleData;
> + GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
>
> - if (image_dim > 1) {
> - GBE_ASSERT(AI != AE);
> - vcoord = this->getRegister(*AI);
> - ++AI;
> - } else
> - vcoord = ir::ocl::invalid;
> -
> - if (image_dim > 2) {
> - GBE_ASSERT(AI != AE);
> - wcoord = this->getRegister(*AI);
> - ++AI;
> - } else {
> - wcoord = ir::ocl::invalid;
> - }
> + for (uint32_t elemID = 0; elemID < 3; elemID++) {
> + ir::Register reg;
>
> - GBE_ASSERT(AI != AE);
> - vector<ir::Register> srcTupleData;
> + if (elemID < imageDim)
> + reg = this->getRegister(*AI, elemID);
> + else
> + reg = ir::ocl::invalid;
>
> - srcTupleData.push_back(ucoord);
> - srcTupleData.push_back(vcoord);
> - srcTupleData.push_back(wcoord);
> + srcTupleData.push_back(reg);
> + }
> + ++AI; GBE_ASSERT(AI != AE);
> + uint32_t elemNum;
> + (void)getVectorInfo(ctx, *AI, elemNum);
> + GBE_ASSERT(elemNum == 4);
>
> - const uint32_t elemNum = 4;
> for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> const ir::Register reg = this->getRegister(*AI, elemID);
> srcTupleData.push_back(reg);
> @@ -3523,7 +3527,7 @@ handle_write_image:
> GBE_ASSERT(0); // never been here.
> }
>
> - ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
> + ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
> break;
> }
> case GEN_OCL_MUL_HI_INT:
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 7434c78..8d55c3f 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16,
> __gen_ocl_force_simd16)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D,
> _Z21__gen_ocl_read_imageijtfj)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D,
> _Z22__gen_ocl_read_imageuijtfj)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D,
> _Z21__gen_ocl_read_imagefjtfj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D,
> _Z21__gen_ocl_read_imageijtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D,
> _Z22__gen_ocl_read_imageuijtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D,
> _Z21__gen_ocl_read_imagefjtffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
> _Z21__gen_ocl_read_imageijtfffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
> _Z22__gen_ocl_read_imageuijtfffj)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
> _Z21__gen_ocl_read_imagefjtfffj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D,
> _Z21__gen_ocl_read_imageijtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D,
> _Z22__gen_ocl_read_imageuijtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D,
> _Z21__gen_ocl_read_imagefjtDv2_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
> _Z21__gen_ocl_read_imageijtDv4_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
> _Z22__gen_ocl_read_imageuijtDv4_fj)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
> _Z21__gen_ocl_read_imagefjtDv4_fj)
> // work around read image with the LD message. The coords are integer
> type.
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I,
> _Z21__gen_ocl_read_imageijtij)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I,
> _Z22__gen_ocl_read_imageuijtij)
> DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I,
> _Z21__gen_ocl_read_imagefjtij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I,
> _Z21__gen_ocl_read_imageijtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I,
> _Z22__gen_ocl_read_imageuijtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I,
> _Z21__gen_ocl_read_imagefjtiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> _Z21__gen_ocl_read_imageijtiiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> _Z22__gen_ocl_read_imageuijtiiij)
> -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> _Z21__gen_ocl_read_imagefjtiiij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I,
> _Z21__gen_ocl_read_imageijtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I,
> _Z22__gen_ocl_read_imageuijtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I,
> _Z21__gen_ocl_read_imagefjtDv2_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I,
> _Z21__gen_ocl_read_imageijtDv4_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I,
> _Z22__gen_ocl_read_imageuijtDv4_ij)
> +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I,
> _Z21__gen_ocl_read_imagefjtDv4_ij)
>
> // To write_image functions.
> DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D,
> _Z22__gen_ocl_write_imageijiDv4_i)
> DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D,
> _Z23__gen_ocl_write_imageuijiDv4_j)
> DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D,
> _Z22__gen_ocl_write_imagefjiDv4_f)
>
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D,
> _Z22__gen_ocl_write_imageijiiDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D,
> _Z23__gen_ocl_write_imageuijiiDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D,
> _Z22__gen_ocl_write_imagefjiiDv4_f)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D,
> _Z22__gen_ocl_write_imageijDv2_iDv4_i)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D,
> _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D,
> _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
>
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
> _Z22__gen_ocl_write_imageijiiiDv4_i)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
> _Z23__gen_ocl_write_imageuijiiiDv4_j)
> -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
> _Z22__gen_ocl_write_imagefjiiiDv4_f)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
> _Z22__gen_ocl_write_imageijDv4_iS_)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
> _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
> +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
> _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
>
> // To get image info function
> DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH,
> __gen_ocl_get_image_width)
> diff --git a/backend/src/llvm/llvm_scalarize.cpp
> b/backend/src/llvm/llvm_scalarize.cpp
> index 5450a2b..baf526b 100644
> --- a/backend/src/llvm/llvm_scalarize.cpp
> +++ b/backend/src/llvm/llvm_scalarize.cpp
> @@ -648,7 +648,7 @@ namespace gbe {
>
> // Get the function arguments
> CallSite CS(call);
> - CallSite::arg_iterator CI = CS.arg_begin() + 2;
> + CallSite::arg_iterator CI = CS.arg_begin() + 1;
>
> switch (it->second) {
> default: break;
> @@ -661,8 +661,7 @@ namespace gbe {
> case GEN_OCL_READ_IMAGE_I_3D:
> case GEN_OCL_READ_IMAGE_UI_3D:
> case GEN_OCL_READ_IMAGE_F_3D:
> -
> - case GEN_OCL_READ_IMAGE_I_1D_I:
> + case GEN_OCL_READ_IMAGE_I_1D_I:
> case GEN_OCL_READ_IMAGE_UI_1D_I:
> case GEN_OCL_READ_IMAGE_F_1D_I:
> case GEN_OCL_READ_IMAGE_I_2D_I:
> @@ -674,6 +673,9 @@ namespace gbe {
> case GEN_OCL_GET_IMAGE_WIDTH:
> case GEN_OCL_GET_IMAGE_HEIGHT:
> {
> + ++CI;
> + if ((*CI)->getType()->isVectorTy())
> + *CI = InsertToVector(call, *CI);
> setAppendPoint(call);
> extractFromVector(call);
> break;
> @@ -681,15 +683,16 @@ namespace gbe {
> case GEN_OCL_WRITE_IMAGE_I_3D:
> case GEN_OCL_WRITE_IMAGE_UI_3D:
> case GEN_OCL_WRITE_IMAGE_F_3D:
> - CI++;
> case GEN_OCL_WRITE_IMAGE_I_2D:
> case GEN_OCL_WRITE_IMAGE_UI_2D:
> case GEN_OCL_WRITE_IMAGE_F_2D:
> - CI++;
> case GEN_OCL_WRITE_IMAGE_I_1D:
> case GEN_OCL_WRITE_IMAGE_UI_1D:
> case GEN_OCL_WRITE_IMAGE_F_1D:
> {
> + if ((*CI)->getType()->isVectorTy())
> + *CI = InsertToVector(call, *CI);
> + ++CI;
> *CI = InsertToVector(call, *CI);
> break;
> }
> --
> 1.8.3.2
More information about the Beignet
mailing list