[Beignet] [PATCH 4/4] GBE: code cleanup for read_image/write_image.
Yang, Rong R
rong.r.yang at intel.com
Sun Feb 23 20:03:34 PST 2014
The patchset LGTM. Thanks.
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Wednesday, February 19, 2014 8:38 AM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH 4/4] GBE: code cleanup for read_image/write_image.
Remove some useless instructions and make the read/write_image more readable.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/llvm/llvm_gen_backend.cpp | 133 +++++++++-------------------
backend/src/llvm/llvm_gen_ocl_function.hxx | 42 ++++-----
backend/src/llvm/llvm_scalarize.cpp | 36 +++-----
backend/src/ocl_stdlib.tmpl.h | 12 ---
4 files changed, 69 insertions(+), 154 deletions(-)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 2d4fb0a..4eecae5 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2092,31 +2092,19 @@ namespace gbe
case GEN_OCL_LGBARRIER:
ctx.getFunction().setUseSLM(true);
break;
- case GEN_OCL_WRITE_IMAGE0:
- case GEN_OCL_WRITE_IMAGE1:
- case GEN_OCL_WRITE_IMAGE2:
- case GEN_OCL_WRITE_IMAGE3:
- case GEN_OCL_WRITE_IMAGE4:
- case GEN_OCL_WRITE_IMAGE5:
- case GEN_OCL_WRITE_IMAGE10:
- case GEN_OCL_WRITE_IMAGE11:
- case GEN_OCL_WRITE_IMAGE12:
- case GEN_OCL_WRITE_IMAGE13:
- case GEN_OCL_WRITE_IMAGE14:
- case GEN_OCL_WRITE_IMAGE15:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_I_3D:
+ case GEN_OCL_WRITE_IMAGE_UI_3D:
+ case GEN_OCL_WRITE_IMAGE_F_3D:
break;
- case GEN_OCL_READ_IMAGE0:
- case GEN_OCL_READ_IMAGE1:
- case GEN_OCL_READ_IMAGE2:
- case GEN_OCL_READ_IMAGE3:
- case GEN_OCL_READ_IMAGE4:
- case GEN_OCL_READ_IMAGE5:
- case GEN_OCL_READ_IMAGE10:
- case GEN_OCL_READ_IMAGE11:
- case GEN_OCL_READ_IMAGE12:
- case GEN_OCL_READ_IMAGE13:
- case GEN_OCL_READ_IMAGE14:
- case GEN_OCL_READ_IMAGE15:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_I_3D:
+ case GEN_OCL_READ_IMAGE_UI_3D:
+ case GEN_OCL_READ_IMAGE_F_3D:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
@@ -2384,18 +2372,12 @@ namespace gbe
ctx.GET_SAMPLER_INFO(reg, ir::ocl::samplerinfo, index);
break;
}
- case GEN_OCL_READ_IMAGE0:
- case GEN_OCL_READ_IMAGE1:
- case GEN_OCL_READ_IMAGE2:
- case GEN_OCL_READ_IMAGE3:
- case GEN_OCL_READ_IMAGE4:
- case GEN_OCL_READ_IMAGE5:
- case GEN_OCL_READ_IMAGE10:
- case GEN_OCL_READ_IMAGE11:
- case GEN_OCL_READ_IMAGE12:
- case GEN_OCL_READ_IMAGE13:
- case GEN_OCL_READ_IMAGE14:
- case GEN_OCL_READ_IMAGE15:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_I_3D:
+ case GEN_OCL_READ_IMAGE_UI_3D:
+ case GEN_OCL_READ_IMAGE_F_3D:
{
GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
@@ -2407,7 +2389,7 @@ namespace gbe
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
ir::Register wcoord;
bool is3D = false;
- if (it->second >= GEN_OCL_READ_IMAGE10 && it->second <= GEN_OCL_READ_IMAGE15) {
+ if (it->second >= GEN_OCL_READ_IMAGE_I_3D) {
GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
is3D = true;
} else
@@ -2433,52 +2415,33 @@ namespace gbe
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
- ir::Type srcType = ir::TYPE_S32, dstType = ir::TYPE_U32;
+ ir::Type dstType = ir::TYPE_U32;
switch(it->second) {
- case GEN_OCL_READ_IMAGE0:
- case GEN_OCL_READ_IMAGE2:
- case GEN_OCL_READ_IMAGE10:
- case GEN_OCL_READ_IMAGE12:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_I_3D:
+ case GEN_OCL_READ_IMAGE_UI_3D:
dstType = ir::TYPE_U32;
- srcType = ir::TYPE_S32;
break;
- case GEN_OCL_READ_IMAGE1:
- case GEN_OCL_READ_IMAGE3:
- case GEN_OCL_READ_IMAGE11:
- case GEN_OCL_READ_IMAGE13:
- dstType = ir::TYPE_U32;
- srcType = ir::TYPE_FLOAT;
- break;
- case GEN_OCL_READ_IMAGE4:
- case GEN_OCL_READ_IMAGE14:
+ case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_F_3D:
dstType = ir::TYPE_FLOAT;
- srcType = ir::TYPE_S32;
- break;
- case GEN_OCL_READ_IMAGE5:
- case GEN_OCL_READ_IMAGE15:
- srcType = dstType = ir::TYPE_FLOAT;
break;
default:
GBE_ASSERT(0); // never been here.
}
ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
- srcType == ir::TYPE_FLOAT, sampler, samplerOffset, is3D);
+ true, sampler, samplerOffset, is3D);
break;
}
- case GEN_OCL_WRITE_IMAGE0:
- case GEN_OCL_WRITE_IMAGE1:
- case GEN_OCL_WRITE_IMAGE2:
- case GEN_OCL_WRITE_IMAGE3:
- case GEN_OCL_WRITE_IMAGE4:
- case GEN_OCL_WRITE_IMAGE5:
- case GEN_OCL_WRITE_IMAGE10:
- case GEN_OCL_WRITE_IMAGE11:
- case GEN_OCL_WRITE_IMAGE12:
- case GEN_OCL_WRITE_IMAGE13:
- case GEN_OCL_WRITE_IMAGE14:
- case GEN_OCL_WRITE_IMAGE15:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_I_3D:
+ case GEN_OCL_WRITE_IMAGE_UI_3D:
+ case GEN_OCL_WRITE_IMAGE_F_3D:
{
GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
@@ -2486,7 +2449,7 @@ namespace gbe
GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
ir::Register wcoord;
bool is3D = false;
- if(it->second >= GEN_OCL_WRITE_IMAGE10 && it->second <= GEN_OCL_WRITE_IMAGE15) {
+ if(it->second >= GEN_OCL_WRITE_IMAGE_I_3D) {
GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
is3D = true;
} else
@@ -2505,36 +2468,24 @@ namespace gbe
}
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
- ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
+ ir::Type srcType = ir::TYPE_U32;
switch(it->second) {
- case GEN_OCL_WRITE_IMAGE0:
- case GEN_OCL_WRITE_IMAGE2:
- case GEN_OCL_WRITE_IMAGE10:
- case GEN_OCL_WRITE_IMAGE12:
- srcType = coordType = ir::TYPE_U32;
- break;
- case GEN_OCL_WRITE_IMAGE1:
- case GEN_OCL_WRITE_IMAGE3:
- case GEN_OCL_WRITE_IMAGE11:
- case GEN_OCL_WRITE_IMAGE13:
- coordType = ir::TYPE_FLOAT;
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_I_3D:
+ case GEN_OCL_WRITE_IMAGE_UI_3D:
srcType = ir::TYPE_U32;
break;
- case GEN_OCL_WRITE_IMAGE4:
- case GEN_OCL_WRITE_IMAGE14:
+ case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_F_3D:
srcType = ir::TYPE_FLOAT;
- coordType = ir::TYPE_U32;
- break;
- case GEN_OCL_WRITE_IMAGE5:
- case GEN_OCL_WRITE_IMAGE15:
- srcType = coordType = ir::TYPE_FLOAT;
break;
default:
GBE_ASSERT(0); // never been here.
}
- ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, coordType, is3D);
+ ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32,
+ is3D);
break;
}
case GEN_OCL_MUL_HI_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index de2890c..750344a 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -44,34 +44,22 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8) DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
// To read_image functions.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE0, _Z21__gen_ocl_read_imageijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE1, _Z21__gen_ocl_read_imageijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE2, _Z22__gen_ocl_read_imageuijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjtffj)
-
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, _Z21__gen_ocl_read_imageijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, _Z22__gen_ocl_read_imageuijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, _Z21__gen_ocl_read_imagefjtffj)
+
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D,
+_Z21__gen_ocl_read_imageijtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D,
+_Z22__gen_ocl_read_imageuijtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D,
+_Z21__gen_ocl_read_imagefjtfffj)
// To write_image functions.
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE1, _Z22__gen_ocl_write_imageijffDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE4, _Z22__gen_ocl_write_imagefjiiDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j)
-
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE10, _Z22__gen_ocl_write_imageijiiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE11, _Z22__gen_ocl_write_imageijfffDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE12, _Z23__gen_ocl_write_imageuijiiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE13, _Z23__gen_ocl_write_imageuijfffDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE14, _Z22__gen_ocl_write_imagefjiiiDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE15, _Z22__gen_ocl_write_imagefjfffDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I,
+_Z22__gen_ocl_write_imageijiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI,
+_Z23__gen_ocl_write_imageuijiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F,
+_Z22__gen_ocl_write_imagefjiiDv4_f)
+
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D,
+_Z22__gen_ocl_write_imageijiiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D,
+_Z23__gen_ocl_write_imageuijiiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D,
+_Z22__gen_ocl_write_imagefjiiiDv4_f)
// To get image info function
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width) @@ -181,4 +169,4 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) -DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) \ No newline at end of file
+DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index c1790f7..7095473 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -643,18 +643,12 @@ namespace gbe {
switch (it->second) {
default: break;
- case GEN_OCL_READ_IMAGE0:
- case GEN_OCL_READ_IMAGE1:
- case GEN_OCL_READ_IMAGE2:
- case GEN_OCL_READ_IMAGE3:
- case GEN_OCL_READ_IMAGE4:
- case GEN_OCL_READ_IMAGE5:
- case GEN_OCL_READ_IMAGE10:
- case GEN_OCL_READ_IMAGE11:
- case GEN_OCL_READ_IMAGE12:
- case GEN_OCL_READ_IMAGE13:
- case GEN_OCL_READ_IMAGE14:
- case GEN_OCL_READ_IMAGE15:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_I_3D:
+ case GEN_OCL_READ_IMAGE_UI_3D:
+ case GEN_OCL_READ_IMAGE_F_3D:
case GEN_OCL_GET_IMAGE_WIDTH:
case GEN_OCL_GET_IMAGE_HEIGHT:
{
@@ -662,19 +656,13 @@ namespace gbe {
extractFromVector(call);
break;
}
- case GEN_OCL_WRITE_IMAGE10:
- case GEN_OCL_WRITE_IMAGE11:
- case GEN_OCL_WRITE_IMAGE12:
- case GEN_OCL_WRITE_IMAGE13:
- case GEN_OCL_WRITE_IMAGE14:
- case GEN_OCL_WRITE_IMAGE15:
+ case GEN_OCL_WRITE_IMAGE_I_3D:
+ case GEN_OCL_WRITE_IMAGE_UI_3D:
+ case GEN_OCL_WRITE_IMAGE_F_3D:
CI++;
- case GEN_OCL_WRITE_IMAGE0:
- case GEN_OCL_WRITE_IMAGE1:
- case GEN_OCL_WRITE_IMAGE2:
- case GEN_OCL_WRITE_IMAGE3:
- case GEN_OCL_WRITE_IMAGE4:
- case GEN_OCL_WRITE_IMAGE5:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
{
*CI = InsertToVector(call, *CI);
break;
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 4de93d3..e78deb3 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4232,33 +4232,21 @@ int __gen_ocl_force_simd16(void); // Image access functions /////////////////////////////////////////////////////////////////////////////
-//OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -//OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -//OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-//OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -//OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -//OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color); -//OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, int4 color); OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color); -//OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, uint4 color); OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color); -//OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float4 color);
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color); -//OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, float w, int4 color); OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color); -//OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, float w, uint4 color); OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color); -//OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float w, float4 color); int __gen_ocl_get_image_width(uint surface_id); int __gen_ocl_get_image_height(uint surface_id); int __gen_ocl_get_image_channel_data_type(uint surface_id);
--
1.7.9.5
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list