[Beignet] [PATCH 1/2] GBE: optimize read_image to avoid get sampler info dynamically.

Zhigang Gong zhigang.gong at intel.com
Thu Feb 20 21:11:11 PST 2014


Most of time, the user is using a const sampler value in the kernel
directly. Thus we don't need to get the sampler value through a function
call. And this way, the compiler front end could do much better optimization
than using the dynamic get sampler information. For the luxmark's
median/simple case, this patch could get about 30-45% performance gain.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/ocl_stdlib.tmpl.h | 13 ++++++-------
 src/cl_kernel.c               |  3 +++
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 307f562..cde9cfb 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4521,23 +4521,21 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler);
 #define DECL_READ_IMAGE(float_coord_rounding_fix, int_clamping_fix,          \
                         image_type, type, suffix, coord_type)                \
   INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
-                                               sampler_t sampler,            \
+                                               const sampler_t sampler,      \
                                                coord_type coord)             \
   {                                                                          \
     GET_IMAGE(cl_image, surface_id);                                         \
     coord_type tmpCoord = coord;                                             \
-    ushort samplerValue;                                                     \
     if (float_coord_rounding_fix | int_clamping_fix) {                       \
-      samplerValue = __gen_ocl_get_sampler_info(sampler);                    \
-      if (((samplerValue & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)         \
-          && ((samplerValue & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {   \
+      if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)         \
+          && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) {   \
         if (float_coord_rounding_fix                                         \
-            && ((samplerValue & CLK_NORMALIZED_COORDS_TRUE) == 0)) {         \
+            && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) {         \
           FIXUP_FLOAT_COORD(tmpCoord);                                       \
         }                                                                    \
         if (int_clamping_fix) {                                              \
            if (OUT_OF_BOX(tmpCoord, surface_id,                              \
-                          (samplerValue & CLK_NORMALIZED_COORDS_TRUE))) {    \
+                          (sampler & CLK_NORMALIZED_COORDS_TRUE))) {    \
             unsigned int border_alpha;                                       \
             int order = __gen_ocl_get_image_channel_order(surface_id);       \
             if (!CLK_HAS_ALPHA(order)) {                                     \
@@ -4555,6 +4553,7 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler);
                         EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
   }
 
+
 #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type)      \
   INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image,          \
                                                coord_type coord)             \
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 6a0c8e6..ecb5e95 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -172,6 +172,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
     k->args[index].mem = NULL;
     k->args[index].sampler = sampler;
     cl_set_sampler_arg_slot(k, index, sampler);
+    offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+    assert(offset + 2 <= k->curbe_sz);
+    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
     return CL_SUCCESS;
   }
 
-- 
1.8.3.2



More information about the Beignet mailing list