[Beignet] [PATCH 2/3] Backend: Optimization use more native math
Grigore Lupescu
grigore.lupescu at intel.com
Mon May 2 04:22:22 UTC 2016
From: Grigore Lupescu <grigore.lupescu at intel.com>
Optimization for exp10, log2, log and log10.
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/libocl/include/ocl_float.h | 1 +
backend/src/libocl/tmpl/ocl_math.tmpl.cl | 30 +++++++++++++++++++++++-------
2 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h
index e63eaf9..6be6c7c 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -81,6 +81,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
#define M_E_F 2.718281828459045F
#define M_LOG2E_F 1.4426950408889634F
#define M_LOG10E_F 0.43429448190325176F
+#define M_LOG210_F 3.3219280948873626F
#define M_LN2_F 0.6931471805599453F
#define M_LN10_F 2.302585092994046F
#define M_PI_F 3.141592653589793F
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 782bfd2..6460755 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -57,7 +57,7 @@ OVERLOADABLE float native_tan(float x) {
}
OVERLOADABLE float native_exp2(float x) { return __gen_ocl_exp(x); }
OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(M_LOG2E_F*x); }
-OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }
+OVERLOADABLE float native_exp10(float x) { return __gen_ocl_exp(M_LOG210_F*x); }
OVERLOADABLE float native_divide(float x, float y) { return x/y; }
/* Fast path */
@@ -257,6 +257,7 @@ OVERLOADABLE float __gen_ocl_internal_log10(float x) {
* is preserved.
* ====================================================
*/
+
union {float f; unsigned i; }u;
const float
zero = 0.0,
@@ -1666,12 +1667,6 @@ OVERLOADABLE float __gen_ocl_internal_rint(float x) {
}
OVERLOADABLE float __gen_ocl_internal_exp(float x) {
- //use native instruction when it has enough precision
- if (x > -0x1.6p1 && x < 0x1.6p1)
- {
- return native_exp(x);
- }
-
float o_threshold = 8.8721679688e+01, /* 0x42b17180 */
u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */
twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
@@ -3527,6 +3522,10 @@ OVERLOADABLE float log(float x) {
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_log(x);
+ /* Use native/faster instruction when it has enough precision */
+ if(x > 0x1.1p0)
+ return __gen_ocl_internal_fastpath_log(x);
+
return __gen_ocl_internal_log(x);
}
@@ -3534,6 +3533,10 @@ OVERLOADABLE float log2(float x) {
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_log2(x);
+ /* Use native/faster instruction when it has enough precision */
+ if(x > 0x1.1p0)
+ return __gen_ocl_internal_fastpath_log2(x);
+
return __gen_ocl_internal_log2(x);
}
@@ -3541,6 +3544,10 @@ OVERLOADABLE float log10(float x) {
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_log10(x);
+ /* Use native/faster instruction when it has enough precision */
+ if(x > 0x1.1p0)
+ return __gen_ocl_internal_fastpath_log10(x);
+
return __gen_ocl_internal_log10(x);
}
@@ -3548,10 +3555,15 @@ OVERLOADABLE float exp(float x) {
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_exp(x);
+ /* Use native/faster instruction when it has enough precision */
+ if (x > -0x1.6p1 && x < 0x1.6p1)
+ return __gen_ocl_internal_fastpath_exp(x);
+
return __gen_ocl_internal_exp(x);
}
OVERLOADABLE float exp2(float x) {
+ /* Use native/faster instruction when it has enough precision, exp2 always */
return native_exp2(x);
}
@@ -3559,6 +3571,10 @@ OVERLOADABLE float exp10(float x) {
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_exp10(x);
+ /* Use native/faster instruction when it has enough precision */
+ if((x < -0x1.4p+5) || (x > +0x1.4p+5))
+ return __gen_ocl_internal_fastpath_exp10(x);
+
return __gen_ocl_internal_exp10(x);
}
--
2.5.0
More information about the Beignet
mailing list