[Beignet] [PATCH 1/2] GBE: optimize read_image to avoid get sampler info dynamically.
Yang, Rong R
rong.r.yang at intel.com
Sun Feb 23 21:22:47 PST 2014
The patchset LGTM. Thanks.
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Friday, February 21, 2014 1:11 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH 1/2] GBE: optimize read_image to avoid get sampler info dynamically.
Most of time, the user is using a const sampler value in the kernel directly. Thus we don't need to get the sampler value through a function call. And this way, the compiler front end could do much better optimization than using the dynamic get sampler information. For the luxmark's median/simple case, this patch could get about 30-45% performance gain.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/ocl_stdlib.tmpl.h | 13 ++++++-------
src/cl_kernel.c | 3 +++
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 307f562..cde9cfb 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4521,23 +4521,21 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler);
#define DECL_READ_IMAGE(float_coord_rounding_fix, int_clamping_fix, \
image_type, type, suffix, coord_type) \
INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- sampler_t sampler, \
+ const sampler_t sampler, \
coord_type coord) \
{ \
GET_IMAGE(cl_image, surface_id); \
coord_type tmpCoord = coord; \
- ushort samplerValue; \
if (float_coord_rounding_fix | int_clamping_fix) { \
- samplerValue = __gen_ocl_get_sampler_info(sampler); \
- if (((samplerValue & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
- && ((samplerValue & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
+ if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
+ && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
if (float_coord_rounding_fix \
- && ((samplerValue & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
+ && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
FIXUP_FLOAT_COORD(tmpCoord); \
} \
if (int_clamping_fix) { \
if (OUT_OF_BOX(tmpCoord, surface_id, \
- (samplerValue & CLK_NORMALIZED_COORDS_TRUE))) { \
+ (sampler & CLK_NORMALIZED_COORDS_TRUE))) { \
unsigned int border_alpha; \
int order = __gen_ocl_get_image_channel_order(surface_id); \
if (!CLK_HAS_ALPHA(order)) { \
@@ -4555,6 +4553,7 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler);
EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\
}
+
#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type) \
INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \
coord_type coord) \
diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 6a0c8e6..ecb5e95 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -172,6 +172,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
k->args[index].mem = NULL;
k->args[index].sampler = sampler;
cl_set_sampler_arg_slot(k, index, sampler);
+ offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+ assert(offset + 2 <= k->curbe_sz);
+ memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
return CL_SUCCESS;
}
--
1.8.3.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list