[Beignet] [PATCH V2 5/7] GBE: fixed the broken 3d image support.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Sep 12 23:01:46 PDT 2013


Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/backend/gen_context.cpp   |    7 ++-
 backend/src/backend/gen_encoder.cpp   |    5 +-
 backend/src/backend/gen_encoder.hpp   |    1 +
 backend/src/llvm/llvm_gen_backend.cpp |   12 +----
 backend/src/ocl_stdlib.tmpl.h         |   89 ++++++++++++++-------------------
 5 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 4e51aff..2eee887 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1021,6 +1021,7 @@ namespace gbe
     const GenRegister vcoord = ra->genReg(insn.src(5));
     const GenRegister wcoord = ra->genReg(insn.src(6));
     uint32_t simdWidth = p->curr.execWidth;
+    uint32_t coord_cnt = 2;
     p->push();
     const uint32_t nr = msgPayload.nr;
     // prepare mesg desc and move to a0.0.
@@ -1028,9 +1029,11 @@ namespace gbe
     /* Prepare message payload. */
     p->MOV(GenRegister::f8grf(nr , 0), ucoord);
     p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
-    if (insn.src(6).reg() != 0)
+    if (insn.src(6).reg() != 0) {
       p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
-    p->SAMPLE(dst, msgPayload, false, bti, sampler, simdWidth, -1, 0);
+      coord_cnt++;
+    }
+    p->SAMPLE(dst, msgPayload, false, bti, sampler, coord_cnt, simdWidth, -1, 0);
     p->pop();
   }
 
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 35e2cee..d2d1655 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1168,15 +1168,16 @@ namespace gbe
                           bool header_present,
                           unsigned char bti,
                           unsigned char sampler,
+                          unsigned int coord_cnt,
                           uint32_t simdWidth,
                           uint32_t writemask,
                           uint32_t return_format)
   {
      if (writemask == 0) return;
-     uint32_t msg_type = (simdWidth == 16) ?
+     uint32_t msg_type =  (simdWidth == 16) ?
                             GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE : GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE;
      uint32_t response_length = (4 * (simdWidth / 8));
-     uint32_t msg_length = (2 * (simdWidth / 8));
+     uint32_t msg_length = (coord_cnt * (simdWidth / 8));
      if (header_present)
        msg_length++;
      uint32_t simd_mode = (simdWidth == 16) ?
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index d90033e..bb88484 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -168,6 +168,7 @@ namespace gbe
                 bool header_present,
                 unsigned char bti,
                 unsigned char sampler,
+                unsigned int coord_cnt,
                 unsigned int simdWidth,
                 uint32_t writemask,
                 uint32_t return_format);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 3c04565..dbd04e6 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2060,11 +2060,7 @@ namespace gbe
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
             ir::Register wcoord;
-            if (it->second == GEN_OCL_READ_IMAGE10 ||
-                it->second == GEN_OCL_READ_IMAGE11 ||
-                it->second == GEN_OCL_READ_IMAGE12 ||
-                it->second == GEN_OCL_READ_IMAGE13 ||
-                it->second == GEN_OCL_READ_IMAGE14) {
+            if (it->second >= GEN_OCL_READ_IMAGE10 && it->second <= GEN_OCL_READ_IMAGE15) {
               GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
             } else
               wcoord = ir::Register(0);
@@ -2133,11 +2129,7 @@ namespace gbe
             GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
             GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
             ir::Register wcoord;
-            if(it->second == GEN_OCL_WRITE_IMAGE10 ||
-               it->second == GEN_OCL_WRITE_IMAGE11 ||
-               it->second == GEN_OCL_WRITE_IMAGE12 ||
-               it->second == GEN_OCL_WRITE_IMAGE13 ||
-               it->second == GEN_OCL_WRITE_IMAGE14) {
+            if(it->second >= GEN_OCL_WRITE_IMAGE10 && it->second <= GEN_OCL_WRITE_IMAGE15) {
               GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
             } else
               wcoord = ir::Register(0);
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 0f91bf5..a15a4dd 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2253,37 +2253,56 @@ int __gen_ocl_get_image_depth(uint surface_id);
 #define GET_IMAGE(cl_image, surface_id) \
     uint surface_id = (uint)cl_image
 
-#define DECL_READ_IMAGE(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, sampler_t sampler, coord_type coord) \
+#define DECL_READ_IMAGE(image_type, type, suffix, coord_type) \
+  INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, sampler_t sampler, coord_type coord) \
   {\
     GET_IMAGE(cl_image, surface_id);\
-    return __gen_ocl_read_image ##suffix(surface_id, sampler, coord.s0, coord.s1);\
+    return __gen_ocl_read_image ##suffix(EXPEND_READ_COORD(surface_id, sampler, coord));\
   }
 
-#define DECL_READ_IMAGE_NOSAMPLER(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, coord_type coord) \
+#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type) \
+  INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, coord_type coord) \
   {\
     GET_IMAGE(cl_image, surface_id);\
-    return __gen_ocl_read_image ##suffix(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord.s0, coord.s1);\
+    return __gen_ocl_read_image ##suffix(EXPEND_READ_COORD(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord));\
   }
 
-#define DECL_WRITE_IMAGE(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE void write_image ##suffix(image2d_t cl_image, coord_type coord, type color)\
+#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
+  INLINE_OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\
   {\
     GET_IMAGE(cl_image, surface_id);\
-    __gen_ocl_write_image ##suffix(surface_id, coord.s0, coord.s1, color);\
+    __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
   }
 
-#define DECL_IMAGE(type, suffix)        \
-  DECL_READ_IMAGE(type, suffix, int2)   \
-  DECL_READ_IMAGE(type, suffix, float2) \
-  DECL_READ_IMAGE_NOSAMPLER(type, suffix, int2) \
-  DECL_WRITE_IMAGE(type, suffix, int2)   \
-  DECL_WRITE_IMAGE(type, suffix, float2)
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
 
-DECL_IMAGE(int4, i)
-DECL_IMAGE(uint4, ui)
-DECL_IMAGE(float4, f)
+#define DECL_IMAGE(image_type, type, suffix, n)        \
+  DECL_READ_IMAGE(image_type, type, suffix, int ##n)   \
+  DECL_READ_IMAGE(image_type, type, suffix, float ##n) \
+  DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n) \
+  DECL_WRITE_IMAGE(image_type, type, suffix, int ## n)   \
+  DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
+
+DECL_IMAGE(image2d_t, int4, i, 2)
+DECL_IMAGE(image2d_t, uint4, ui, 2)
+DECL_IMAGE(image2d_t, float4, f, 2)
+
+#undef EXPEND_READ_COORD
+#undef EXPEND_WRITE_COORD
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
+
+DECL_IMAGE(image3d_t, int4, i, 4)
+DECL_IMAGE(image3d_t, uint4, ui, 4)
+DECL_IMAGE(image3d_t, float4, f, 4)
+
+DECL_IMAGE(image3d_t, int4, i, 3)
+DECL_IMAGE(image3d_t, uint4, ui, 3)
+DECL_IMAGE(image3d_t, float4, f, 3)
+#undef EXPEND_READ_COORD
+#undef EXPEND_WRITE_COORD
 
 #undef DECL_IMAGE
 #undef DECL_READ_IMAGE
@@ -2346,40 +2365,6 @@ INLINE_OVERLOADABLE  size_t get_image_array_size(image1d_array_t image)
   { return __gen_ocl_get_image_array_size(image); }
 #endif
 
-#define DECL_READ_IMAGE(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, sampler_t sampler, coord_type coord) \
-  {\
-    GET_IMAGE(cl_image, surface_id);\
-    return __gen_ocl_read_image ## suffix(surface_id, (uint)sampler, coord.s0, coord.s1, coord.s2);\
-  }
-
-#define DECL_READ_IMAGE_NOSAMPLER(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, coord_type coord) \
-  {\
-    GET_IMAGE(cl_image, surface_id);\
-    return __gen_ocl_read_image ## suffix(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord.s0, coord.s1, coord.s2);\
-  }
-
-#define DECL_WRITE_IMAGE(type, suffix, coord_type) \
-  INLINE_OVERLOADABLE void write_image ## suffix(image3d_t cl_image, coord_type coord, type color)\
-  {\
-    GET_IMAGE(cl_image, surface_id);\
-    __gen_ocl_write_image ## suffix(surface_id, coord.s0, coord.s1, coord.s2, color);\
-  }
-
-#define DECL_IMAGE(type, suffix)        \
-  DECL_READ_IMAGE(type, suffix, int4)   \
-  DECL_READ_IMAGE(type, suffix, float4) \
-  DECL_READ_IMAGE_NOSAMPLER(type, suffix, int4) \
-  DECL_WRITE_IMAGE(type, suffix, int4)   \
-  DECL_WRITE_IMAGE(type, suffix, float4)
-
-DECL_IMAGE(int4, i)
-DECL_IMAGE(uint4, ui)
-DECL_IMAGE(float4, f)
-
-
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : disable
 
 #undef DECL_IMAGE
-- 
1.7.9.5



More information about the Beignet mailing list